From de3cfa2c9823a7e0391d4f4a355e2d78b83eec62 Mon Sep 17 00:00:00 2001
From: Intel <intel.com>
Date: Mon, 3 Jun 2013 00:00:00 +0000
Subject: [PATCH] sched: initial import

Signed-off-by: Intel
---
 app/test/Makefile                            |    2 +
 app/test/autotest_data.py                    |   16 +-
 app/test/commands.c                          |    6 +-
 app/test/test.h                              |    2 +
 app/test/test_red.c                          | 1890 ++++++++++++++++
 app/test/test_sched.c                        |  244 ++
 config/defconfig_i686-default-linuxapp-gcc   |   10 +
 config/defconfig_i686-default-linuxapp-icc   |   10 +
 config/defconfig_x86_64-default-linuxapp-gcc |   11 +
 config/defconfig_x86_64-default-linuxapp-icc |   10 +
 examples/qos_sched/Makefile                  |   58 +
 examples/qos_sched/app_thread.c              |  302 +++
 examples/qos_sched/args.c                    |  467 ++++
 examples/qos_sched/cfg_file.c                |  631 ++++++
 examples/qos_sched/cfg_file.h                |  103 +
 examples/qos_sched/init.c                    |  385 ++++
 examples/qos_sched/main.c                    |  246 ++
 examples/qos_sched/main.h                    |  186 ++
 examples/qos_sched/profile.cfg               |  109 +
 lib/Makefile                                 |    1 +
 lib/librte_eal/common/include/rte_log.h      |    1 +
 lib/librte_mbuf/rte_mbuf.h                   |    1 +
 lib/librte_sched/Makefile                    |   56 +
 lib/librte_sched/rte_approx.c                |  197 ++
 lib/librte_sched/rte_approx.h                |   76 +
 lib/librte_sched/rte_bitmap.h                |  505 +++++
 lib/librte_sched/rte_red.c                   |  160 ++
 lib/librte_sched/rte_red.h                   |  454 ++++
 lib/librte_sched/rte_sched.c                 | 2129 ++++++++++++++++++
 lib/librte_sched/rte_sched.h                 |  446 ++++
 lib/librte_sched/rte_sched_common.h          |  130 ++
 mk/rte.app.mk                                |    6 +
 32 files changed, 8847 insertions(+), 3 deletions(-)
 create mode 100644 app/test/test_red.c
 create mode 100755 app/test/test_sched.c
 create mode 100755 examples/qos_sched/Makefile
 create mode 100755 examples/qos_sched/app_thread.c
 create mode 100755 examples/qos_sched/args.c
 create mode 100755 examples/qos_sched/cfg_file.c
 create mode 100755 examples/qos_sched/cfg_file.h
 create mode 100755 examples/qos_sched/init.c
 create mode 100755 examples/qos_sched/main.c
 create mode 100755 examples/qos_sched/main.h
 create mode 100644 examples/qos_sched/profile.cfg
 create mode 100644 lib/librte_sched/Makefile
 create mode 100644 lib/librte_sched/rte_approx.c
 create mode 100644 lib/librte_sched/rte_approx.h
 create mode 100644 lib/librte_sched/rte_bitmap.h
 create mode 100644 lib/librte_sched/rte_red.c
 create mode 100644 lib/librte_sched/rte_red.h
 create mode 100644 lib/librte_sched/rte_sched.c
 create mode 100644 lib/librte_sched/rte_sched.h
 create mode 100644 lib/librte_sched/rte_sched_common.h

diff --git a/app/test/Makefile b/app/test/Makefile
index 6ba18e44e2..4fecdb6258 100755
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -85,6 +85,8 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_ipaddr.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_cirbuf.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_string.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_lib.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_red.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_sched.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_meter.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_pm.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_acl.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 9bd436bf9c..f2f9965d31 100755
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -271,7 +271,7 @@ parallel_test_group_list = [
 },
 {
 	"Prefix":	"group_6",
-	"Memory" :	all_sockets(588),
+	"Memory" :	all_sockets(600),
 	"Tests" :	
 	[
 		{
@@ -297,7 +297,13 @@ parallel_test_group_list = [
 		 "Command" : 	"prefetch_autotest",
 		 "Func" :	default_autotest,
 		 "Report" :	None,
-		},
+		 },
+		{
+		 "Name" :"Red autotest",
+		 "Command" : "red_autotest",
+		 "Func" :default_autotest,
+		 "Report" :None,
+		 },
 	]
 },
 {
@@ -317,6 +323,12 @@ parallel_test_group_list = [
 		 "Func" :	default_autotest,
 		 "Report" :	None,
 		},
+		 {
+		 "Name" :"Sched autotest",
+		 "Command" : "sched_autotest",
+		 "Func" :default_autotest,
+		 "Report" :None,
+		 },
 	]
 },
 ]
diff --git a/app/test/commands.c b/app/test/commands.c
index 2438433cb6..c7ac1e423a 100755
--- a/app/test/commands.c
+++ b/app/test/commands.c
@@ -167,6 +167,10 @@ static void cmd_autotest_parsed(void *parsed_result,
 		ret |= test_memcpy_perf();
 	if (all || !strcmp(res->autotest, "func_reentrancy_autotest"))
 		ret |= test_func_reentrancy();
+	if (all || !strcmp(res->autotest, "red_autotest"))
+		ret |= test_red();
+	if (all || !strcmp(res->autotest, "sched_autotest"))
+		ret |= test_sched();
 	if (all || !strcmp(res->autotest, "meter_autotest"))
 		ret |= test_meter();
 	if (all || !strcmp(res->autotest, "pm_autotest"))
@@ -203,7 +207,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
 			"version_autotest#eal_fs_autotest#"
 			"cmdline_autotest#func_reentrancy_autotest#"
 			"mempool_perf_autotest#hash_perf_autotest#"
-			"meter_autotest#"
+			"red_autotest#meter_autotest#sched_autotest#"
 			"memcpy_perf_autotest#pm_autotest#"
 			"acl_autotest#power_autotest#"
 			"all_autotests");
diff --git a/app/test/test.h b/app/test/test.h
index 75df8d0064..6bac20943f 100755
--- a/app/test/test.h
+++ b/app/test/test.h
@@ -84,6 +84,8 @@ int test_version(void);
 int test_eal_fs(void);
 int test_cmdline(void);
 int test_func_reentrancy(void);
+int test_red(void);
+int test_sched(void);
 int test_meter(void);
 int test_pmac_pm(void);
 int test_pmac_acl(void);
diff --git a/app/test/test_red.c b/app/test/test_red.c
new file mode 100644
index 0000000000..f0833494a5
--- /dev/null
+++ b/app/test/test_red.c
@@ -0,0 +1,1890 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#ifdef RTE_LIBRTE_SCHED
+
+#include <rte_red.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+#define TEST_HZ_PER_KHZ 1000
+#define TEST_NSEC_MARGIN 500        /**< nanosecond margin when calculating clk freq */
+
+#define MAX_QEMPTY_TIME_MSEC   50000
+#define MSEC_PER_SEC           1000      /**< Milli-seconds per second */
+#define USEC_PER_MSEC          1000      /**< Micro-seconds per milli-second */
+#define USEC_PER_SEC           1000000   /**< Micro-seconds per second */
+
+/**< structures for testing rte_red performance and function */
+struct test_rte_red_config {        /**< Test structure for RTE_RED config */
+	struct rte_red_config *rconfig; /**< RTE_RED configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_RED configs to test */
+	uint8_t *wq_log2;               /**< Test wq_log2 value to use */
+	uint32_t min_th;                /**< Queue minimum threshold */
+	uint32_t max_th;                /**< Queue maximum threshold */
+	uint8_t *maxp_inv;              /**< Inverse mark probability */
+};
+
+struct test_queue {                 /**< Test structure for RTE_RED Queues */
+	struct rte_red *rdata;          /**< RTE_RED runtime data */
+	uint32_t num_queues;            /**< Number of RTE_RED queues to test */
+	uint32_t *qconfig;              /**< Configuration of RTE_RED queues for test */
+	uint32_t *q;                    /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	uint32_t avg_ramp_up;           /**< Average num of enqueues to ramp up the queue */ 
+	uint32_t avg_tolerance;         /**< Tolerance in queue average */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_RED */
+	uint32_t wait_usec;             /**< Micro second wait interval */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t sleep_sec;             /**< Seconds to sleep */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+};
+
+struct test_config {                /**< Master test structure for RTE_RED */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_red_config *tconfig; /**< Test structure for RTE_RED config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_RED Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_RED */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte = 0;
+static double pkt_time_usec = 0;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+	pkt_time_usec = 1000000.0 / ((double)port_speed_bytes / (double)RTE_RED_S);
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	asm( "cpuid" : : : "%eax", "%ebx", "%ecx", "%edx" );
+	p->clk_start = rte_rdtsc();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc>0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64 ", max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint32_t rte_red_get_avg_int(const struct rte_red_config *red_cfg,
+				    struct rte_red *red)
+{
+	/**
+	 * scale by 1/n and convert from fixed-point to integer
+	 */
+	return red->avg >> (RTE_RED_SCALING + red_cfg->wq_log2);
+}
+
+static double rte_red_get_avg_float(const struct rte_red_config *red_cfg,
+				    struct rte_red *red)
+{
+	/**
+	 * scale by 1/n and convert from fixed-point to floating-point
+	 */
+	return ldexp((double)red->avg,  -(RTE_RED_SCALING + red_cfg->wq_log2));
+}
+
+static void rte_red_set_avg_int(const struct rte_red_config *red_cfg,
+				struct rte_red *red,
+				uint32_t avg)
+{
+	/**
+	 * scale by n and convert from integer to fixed-point
+	 */
+	red->avg = avg << (RTE_RED_SCALING + red_cfg->wq_log2);
+}
+
+static double calc_exp_avg_on_empty(double avg, uint32_t n, uint32_t time_diff)
+{
+	return avg * pow((1.0 - 1.0 / (double)n), (double)time_diff / pkt_time_usec);
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ * calculate the drop probability
+ */
+static double calc_drop_prob(uint32_t min_th, uint32_t max_th,
+			     uint32_t maxp_inv, uint32_t avg)
+{
+	double drop_prob = 0.0;
+
+	if (avg < min_th) {
+		drop_prob = 0.0;
+	} else if (avg < max_th) {
+		drop_prob = (1.0 / (double)maxp_inv)
+			* ((double)(avg - min_th)
+			   / (double)(max_th - min_th));
+	} else {
+		drop_prob = 1.0;
+	}
+	return (drop_prob);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob, double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+	        *diff = 0.0;
+	} else {
+	        *diff = (abs_diff / drop_prob) * 100.0;
+	        if (*diff > tolerance) {
+	                ret = 0;
+	        }
+        }
+	return (ret);
+}
+
+/**
+ *  check if average queue size is within tolerance
+ */
+static int check_avg(double *diff, double avg, double exp_avg, double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(avg - exp_avg);
+	if ((int)abs_diff == 0) {
+	        *diff = 0.0;
+	} else {
+	        *diff = (abs_diff / exp_avg) * 100.0;
+	        if (*diff > tolerance) {
+	                ret = 0;
+                }
+	}
+	return (ret);
+}
+
+/**
+ * get the clk frequency in Hz
+ */
+static uint64_t get_machclk_freq(void)
+{
+	uint64_t start = 0;
+	uint64_t end = 0;
+	uint64_t diff = 0;
+	uint64_t clk_freq_hz = 0;
+	struct timespec tv_start = {0, 0}, tv_end = {0, 0};
+	struct timespec req = {0, 0};
+
+	req.tv_sec = 1;
+	req.tv_nsec = 0;
+
+	clock_gettime(CLOCK_REALTIME, &tv_start);
+	start = rte_rdtsc();
+
+	if (nanosleep(&req, NULL) != 0) {
+		perror("get_machclk_freq()");
+		exit(EXIT_FAILURE);
+	}
+
+	clock_gettime(CLOCK_REALTIME, &tv_end);
+	end = rte_rdtsc();
+
+	diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * USEC_PER_SEC
+		+ ((tv_end.tv_nsec - tv_start.tv_nsec + TEST_NSEC_MARGIN) / 
+		   USEC_PER_MSEC); /**< diff is in micro secs */
+
+	if (diff == 0)
+		return(0);
+
+	clk_freq_hz = ((end - start) * USEC_PER_SEC / diff);
+	return (clk_freq_hz);
+}
+
+/**
+ * initialize the test rte_red config
+ */
+static enum test_result
+test_rte_red_init(struct test_config *tcfg)
+{
+	unsigned i = 0;
+
+	tcfg->tvar->clk_freq = get_machclk_freq();
+	init_port_ts( tcfg->tvar->clk_freq );
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_red_config_init(&tcfg->tconfig->rconfig[i],
+					(uint16_t)tcfg->tconfig->wq_log2[i],
+					(uint16_t)tcfg->tconfig->min_th,
+					(uint16_t)tcfg->tconfig->max_th,
+					(uint16_t)tcfg->tconfig->maxp_inv[i]) != 0) {
+			return(FAIL);
+		}
+	}
+
+	*tcfg->tqueue->q = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+	return(PASS);
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_actual_qsize(struct rte_red_config *red_cfg,
+                      struct rte_red *red,
+                      uint32_t *q,
+                      uint32_t level,
+                      uint32_t attempts)
+{
+        uint32_t i = 0;
+
+        for (i = 0; i < attempts; i++) {
+                int ret = 0;
+
+                /**
+                 * enqueue
+                 */
+                ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts() );
+                if (ret == 0) {
+                        if (++(*q) >= level)
+                                break;
+                }
+        }
+        /**
+        * check if target actual queue size has been reached
+        */
+        if (*q != level)
+                return (-1);
+        /**
+         * success
+         */
+        return (0);
+}
+
+/**
+ * enqueue until average queue size reaches target level
+ */
+static int
+increase_average_qsize(struct rte_red_config *red_cfg,
+                       struct rte_red *red,
+                       uint32_t *q,
+                       uint32_t level,
+                       uint32_t num_ops)
+{
+        uint32_t avg = 0;
+        uint32_t i = 0;
+
+        for (i = 0; i < num_ops; i++) {
+                /**
+                 * enqueue
+                 */
+                rte_red_enqueue(red_cfg, red, *q, get_port_ts());
+        }
+        /**
+         * check if target average queue size has been reached
+         */
+        avg = rte_red_get_avg_int(red_cfg, red);
+        if (avg != level)
+                return (-1);
+        /**
+         * success
+         */
+        return (0);
+}
+
+/**
+ * setup default values for the functional test structures
+ */
+static struct rte_red_config ft_wrconfig[1];
+static struct rte_red ft_rtdata[1];
+static uint8_t ft_wq_log2[] = {9};
+static uint8_t ft_maxp_inv[] = {10}; 
+static uint32_t  ft_qconfig[] = {0, 0, 1, 1};
+static uint32_t  ft_q[] ={0};
+static uint32_t  ft_dropped[] ={0};
+static uint32_t  ft_enqueued[] ={0};
+
+static struct test_rte_red_config ft_tconfig =  {
+	.rconfig = ft_wrconfig,
+	.num_cfg = DIM(ft_wrconfig),
+	.wq_log2 = ft_wq_log2,
+	.min_th = 32,
+	.max_th = 128,
+	.maxp_inv = ft_maxp_inv,
+};
+
+static struct test_queue ft_tqueue = {
+	.rdata = ft_rtdata,
+	.num_queues = DIM(ft_rtdata),
+	.qconfig = ft_qconfig,
+	.q = ft_q,
+	.q_ramp_up = 1000000,
+	.avg_ramp_up = 1000000,
+	.avg_tolerance = 5,  /* 5 percent */
+	.drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_var ft_tvar = {
+	.wait_usec = 250000,
+	.num_iterations = 20,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+	.sleep_sec = (MAX_QEMPTY_TIME_MSEC / MSEC_PER_SEC) + 2,
+};
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void enqueue_dequeue_func(struct rte_red_config *red_cfg,
+                                 struct rte_red *red,
+                                 uint32_t *q,
+                                 uint32_t num_ops,
+                                 uint32_t *enqueued,
+                                 uint32_t *dropped)
+{
+        uint32_t i = 0;
+
+        for (i = 0; i < num_ops; i++) {
+                int ret = 0;
+
+                /**
+                 * enqueue
+                 */
+                ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts());
+                if (ret == 0)
+                        (*enqueued)++;
+                else
+                        (*dropped)++;
+        }
+}
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft1_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test1_config = {
+	.ifname = "functional test 1 interface",
+	.msg = "functional test 1 : use one rte_red configuration,\n"
+	"		    increase average queue size to various levels,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "                "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft1_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt); 
+
+	for (i = 0; i < DIM(ft1_tlevels); i++) {
+		const char *label = NULL;
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		/**
+		 * reset rte_red run-time data
+		 */
+		rte_red_rt_data_init(tcfg->tqueue->rdata);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_actual_qsize(tcfg->tconfig->rconfig,
+					  tcfg->tqueue->rdata,
+					  tcfg->tqueue->q,
+					  tcfg->tlevel[i],
+					  tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		if (increase_average_qsize(tcfg->tconfig->rconfig,
+					   tcfg->tqueue->rdata,
+					   tcfg->tqueue->q,
+					   tcfg->tlevel[i],
+					   tcfg->tqueue->avg_ramp_up) != 0)  {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(tcfg->tconfig->rconfig,
+				     tcfg->tqueue->rdata,
+				     tcfg->tqueue->q,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+		if (avg != tcfg->tlevel[i]) {
+                        fprintf(stderr, "Fail: avg != level\n");
+			result = FAIL;
+                }
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+		drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+					   *tcfg->tconfig->maxp_inv, tcfg->tlevel[i]);
+		if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+		        result = FAIL;
+
+		if (tcfg->tlevel[i] == tcfg->tconfig->min_th)
+			label = "min thresh:     ";
+		else if (tcfg->tlevel[i] == tcfg->tconfig->max_th)
+			label = "max thresh:     ";
+		else
+			label = "                ";
+		printf("%s%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+		       label, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+		       drop_prob * 100.0, drop_rate * 100.0, diff,
+	               (double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return (result);
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint8_t ft2_wq_log2[] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9};
+static uint8_t ft2_maxp_inv[] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100};
+static struct rte_red_config ft2_rconfig[10];
+
+static struct test_rte_red_config ft2_tconfig =  {
+	.rconfig = ft2_rconfig,
+	.num_cfg = DIM(ft2_rconfig),
+	.wq_log2 = ft2_wq_log2,
+	.min_th = 32,
+	.max_th = 128,
+	.maxp_inv = ft2_maxp_inv,
+};
+
+static struct test_config func_test2_config = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several RED configurations,\n"
+	"		    increase average queue size to just below maximum threshold,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "RED config     "
+	"avg queue size "
+	"min threshold  "
+	"max threshold  "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+        double prev_drop_rate = 1.0;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+	rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+	if (increase_actual_qsize(tcfg->tconfig->rconfig,
+				  tcfg->tqueue->rdata,
+				  tcfg->tqueue->q,
+				  *tcfg->tlevel,
+				  tcfg->tqueue->q_ramp_up) != 0) {
+		result = FAIL;
+		goto out;
+	}
+
+	if (increase_average_qsize(tcfg->tconfig->rconfig,
+				   tcfg->tqueue->rdata,
+				   tcfg->tqueue->q,
+				   *tcfg->tlevel,
+				   tcfg->tqueue->avg_ramp_up) != 0) {
+		result = FAIL;
+		goto out;
+	}
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		*tcfg->tvar->dropped = 0;
+		*tcfg->tvar->enqueued = 0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->rconfig[i],
+				     tcfg->tqueue->rdata,
+				     tcfg->tqueue->q,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[i], tcfg->tqueue->rdata);
+		if (avg != *tcfg->tlevel)
+			result = FAIL;
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+		drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+					   tcfg->tconfig->maxp_inv[i], *tcfg->tlevel);
+		if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+		        result = FAIL;
+	        /**
+	         * drop rate should decrease as maxp_inv increases
+	         */
+	        if (drop_rate > prev_drop_rate)
+	                result = FAIL;
+	        prev_drop_rate = drop_rate;
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+		       i, avg, tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+		       drop_prob * 100.0, drop_rate * 100.0, diff,
+	               (double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return (result);
+}
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevel[] = {1022};
+
+static struct test_rte_red_config ft3_tconfig =  {
+	.rconfig = ft_wrconfig,
+	.num_cfg = DIM(ft_wrconfig),
+	.wq_log2 = ft_wq_log2,
+	.min_th = 32,
+	.max_th = 1023,
+	.maxp_inv = ft_maxp_inv,
+};
+
+static struct test_config func_test3_config = {
+	.ifname = "functional test 3 interface",
+	.msg = "functional test 3 : use one RED configuration,\n"
+	"		    increase average queue size to target level,\n"
+	"		    dequeue all packets until queue is empty,\n"
+	"		    confirm that average queue size is computed correctly while queue is empty\n\n",
+	.htxt = "q avg before   "
+	"q avg after    "
+	"expected       "
+	"difference %   "
+	"tolerance %    "
+	"result	 "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft3_tlevel,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+	if (increase_actual_qsize(tcfg->tconfig->rconfig,
+				  tcfg->tqueue->rdata,
+				  tcfg->tqueue->q,
+				  *tcfg->tlevel,
+				  tcfg->tqueue->q_ramp_up) != 0) {
+		result = FAIL;
+		goto out;
+	}
+
+	if (increase_average_qsize(tcfg->tconfig->rconfig,
+				   tcfg->tqueue->rdata,
+				   tcfg->tqueue->q,
+				   *tcfg->tlevel,
+				   tcfg->tqueue->avg_ramp_up) != 0) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tvar->num_iterations; i++) {
+		double avg_before = 0;
+		double avg_after = 0;
+                double exp_avg = 0;
+		double diff = 0.0;
+
+		avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+
+		/**
+		* empty the queue
+		*/
+		*tcfg->tqueue->q = 0;
+		rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+		rte_delay_us(tcfg->tvar->wait_usec);
+
+		/**
+		 * enqueue one packet to recalculate average queue size
+		 */
+		if (rte_red_enqueue(tcfg->tconfig->rconfig,
+				    tcfg->tqueue->rdata,
+				    *tcfg->tqueue->q,
+				    get_port_ts()) == 0) {
+			(*tcfg->tqueue->q)++;
+		} else {
+			printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__);
+			result = FAIL;
+		}
+
+		exp_avg = calc_exp_avg_on_empty(avg_before, 
+					      (1 << *tcfg->tconfig->wq_log2),
+					      tcfg->tvar->wait_usec);
+		avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, 
+						  tcfg->tqueue->rdata);
+		if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+		        result = FAIL;
+
+		printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+		       avg_before, avg_after, exp_avg, diff,
+		       (double)tcfg->tqueue->avg_tolerance,
+		       diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+	}
+out:
+	return (result);
+}
+
+/**
+ * Test F4: functional test 4
+ */
+static uint32_t ft4_tlevel[] = {1022};
+static uint8_t ft4_wq_log2[] = {11};
+
+static struct test_rte_red_config ft4_tconfig =  {
+	.rconfig = ft_wrconfig,
+	.num_cfg = DIM(ft_wrconfig),
+	.min_th = 32,
+	.max_th = 1023,
+	.wq_log2 = ft4_wq_log2,
+	.maxp_inv = ft_maxp_inv,
+};
+
+static struct test_queue ft4_tqueue = {
+	.rdata = ft_rtdata,
+	.num_queues = DIM(ft_rtdata),
+	.qconfig = ft_qconfig,
+	.q = ft_q,
+	.q_ramp_up = 1000000,
+	.avg_ramp_up = 1000000,
+	.avg_tolerance = 0,  /* 0 percent */
+	.drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_config func_test4_config = {
+	.ifname = "functional test 4 interface",
+	.msg = "functional test 4 : use one RED configuration,\n"
+	"		    increase average queue size to target level,\n"
+	"		    dequeue all packets until queue is empty,\n"
+	"		    confirm that average queue size is computed correctly while\n"
+	"		    queue is empty for more than 50 sec,\n"
+	"		    (this test takes 52 sec to run)\n\n",
+	.htxt = "q avg before   "
+	"q avg after    "
+	"expected       "
+	"difference %   "
+	"tolerance %    "
+	"result	 "
+	"\n",
+	.tconfig = &ft4_tconfig,
+	.tqueue = &ft4_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft4_tlevel,
+};
+
+static enum test_result func_test4(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint64_t time_diff = 0;
+	uint64_t start = 0;
+	double avg_before = 0.0;
+	double avg_after = 0.0;
+        double exp_avg = 0.0;
+        double diff = 0.0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+	if (increase_actual_qsize(tcfg->tconfig->rconfig,
+				  tcfg->tqueue->rdata,
+				  tcfg->tqueue->q,
+				  *tcfg->tlevel,
+				  tcfg->tqueue->q_ramp_up) != 0) {
+		result = FAIL;
+		goto out;
+	}
+
+	if (increase_average_qsize(tcfg->tconfig->rconfig,
+				   tcfg->tqueue->rdata,
+				   tcfg->tqueue->q,
+				   *tcfg->tlevel,
+				   tcfg->tqueue->avg_ramp_up) != 0) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+
+	/**
+	 * empty the queue
+	 */
+	*tcfg->tqueue->q = 0;
+	rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+	/**
+	 * record empty time locally 
+	 */
+	start = rte_rdtsc();
+
+	sleep(tcfg->tvar->sleep_sec);
+
+	/**
+	 * enqueue one packet to recalculate average queue size
+	 */
+	if (rte_red_enqueue(tcfg->tconfig->rconfig,  
+			    tcfg->tqueue->rdata, 
+			    *tcfg->tqueue->q,
+			    get_port_ts()) != 0) {
+		result = FAIL;
+		goto out;
+	}
+	(*tcfg->tqueue->q)++;
+
+	/**
+	 * calculate how long queue has been empty
+	 */
+	time_diff = ((rte_rdtsc() - start) / tcfg->tvar->clk_freq)
+		  * MSEC_PER_SEC;
+	if (time_diff < MAX_QEMPTY_TIME_MSEC) {
+		/**
+		 * this could happen if sleep was interrupted for some reason
+		 */
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * confirm that average queue size is now at expected level
+	 */
+        exp_avg = 0.0;
+	avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+	if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+	        result = FAIL;
+
+	printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+	       avg_before, avg_after, exp_avg,
+	       diff, (double)tcfg->tqueue->avg_tolerance,
+	       diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+out:
+	return (result);
+}
+
+/**
+ * Test F5: functional test 5
+ */
+static uint32_t ft5_tlevel[] = {127};
+static uint8_t ft5_wq_log2[] = {9, 8};
+static uint8_t ft5_maxp_inv[] = {10, 20};
+static struct rte_red_config ft5_config[2];
+static struct rte_red ft5_data[4];
+static uint32_t ft5_q[4];
+static uint32_t ft5_dropped[] = {0, 0, 0, 0};
+static uint32_t ft5_enqueued[] = {0, 0, 0, 0};
+
+static struct test_rte_red_config ft5_tconfig =  {
+	.rconfig = ft5_config,
+	.num_cfg = DIM(ft5_config),
+	.min_th = 32,
+	.max_th = 128,
+	.wq_log2 = ft5_wq_log2,
+	.maxp_inv = ft5_maxp_inv,
+};
+
+static struct test_queue ft5_tqueue = {
+	.rdata = ft5_data,
+	.num_queues = DIM(ft5_data),
+	.qconfig = ft_qconfig,
+	.q = ft5_q,
+	.q_ramp_up = 1000000,
+	.avg_ramp_up = 1000000,
+	.avg_tolerance = 5,  /* 10 percent */
+	.drop_tolerance = 50,  /* 50 percent */
+};
+
+struct test_var ft5_tvar = {
+	.wait_usec = 0,
+	.num_iterations = 15,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft5_dropped,
+	.enqueued = ft5_enqueued,
+	.sleep_sec = 0,
+};
+
+static struct test_config func_test5_config = {
+	.ifname = "functional test 5 interface",
+	.msg = "functional test 5 : use several queues (each with its own run-time data),\n"
+	"		    use several RED configurations (such that each configuration is shared by multiple queues),\n"
+	"		    increase average queue size to just below maximum threshold,\n"
+	"		    compare drop rate to drop probability,\n"
+	"		    (this is a larger scale version of functional test 2)\n\n",
+	.htxt = "queue          "
+	"config         "
+	"avg queue size "
+	"min threshold  "
+	"max threshold  "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft5_tconfig,
+	.tqueue = &ft5_tqueue,
+	.tvar = &ft5_tvar,
+	.tlevel = ft5_tlevel,
+};
+
+static enum test_result func_test5(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t j = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+		rte_red_rt_data_init(&tcfg->tqueue->rdata[j]);
+		tcfg->tqueue->q[j] = 0;
+
+		if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+					  &tcfg->tqueue->rdata[j],
+					  &tcfg->tqueue->q[j],
+					  *tcfg->tlevel,
+					  tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+					   &tcfg->tqueue->rdata[j],
+					   &tcfg->tqueue->q[j],
+					   *tcfg->tlevel,
+					   tcfg->tqueue->avg_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+	}
+
+	for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		tcfg->tvar->dropped[j] = 0;
+		tcfg->tvar->enqueued[j] = 0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+				     &tcfg->tqueue->rdata[j],
+				     &tcfg->tqueue->q[j],
+				     tcfg->tvar->num_ops,
+				     &tcfg->tvar->enqueued[j],
+				     &tcfg->tvar->dropped[j]);
+
+		avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+					  &tcfg->tqueue->rdata[j]);
+		if (avg != *tcfg->tlevel)
+			result = FAIL;
+
+		drop_rate = calc_drop_rate(tcfg->tvar->enqueued[j],tcfg->tvar->dropped[j]);
+		drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+					   tcfg->tconfig->maxp_inv[tcfg->tqueue->qconfig[j]], 
+					   *tcfg->tlevel);
+		if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+		        result = FAIL;
+
+		printf("%-15u%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+		       j, tcfg->tqueue->qconfig[j], avg,
+		       tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+		       drop_prob * 100.0, drop_rate * 100.0,
+		       diff, (double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return (result);
+}
+
+/**
+ * Test F6: functional test 6
+ */
+static uint32_t ft6_tlevel[] = {1022};
+static uint8_t ft6_wq_log2[] = {9, 8};
+static uint8_t ft6_maxp_inv[] = {10, 20};
+static struct rte_red_config ft6_config[2];
+static struct rte_red ft6_data[4];
+static uint32_t ft6_q[4];
+
+static struct test_rte_red_config ft6_tconfig =  {
+	.rconfig = ft6_config,
+	.num_cfg = DIM(ft6_config),
+	.min_th = 32,
+	.max_th = 1023,
+	.wq_log2 = ft6_wq_log2,
+	.maxp_inv = ft6_maxp_inv,
+};
+
+static struct test_queue ft6_tqueue = {
+	.rdata = ft6_data,
+	.num_queues = DIM(ft6_data),
+	.qconfig = ft_qconfig,
+	.q = ft6_q,
+	.q_ramp_up = 1000000,
+	.avg_ramp_up = 1000000,
+	.avg_tolerance = 5,  /* 10 percent */
+	.drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_config func_test6_config = {
+	.ifname = "functional test 6 interface",
+	.msg = "functional test 6 : use several queues (each with its own run-time data),\n"
+	"		    use several RED configurations (such that each configuration is sharte_red by multiple queues),\n"
+	"		    increase average queue size to target level,\n"
+	"		    dequeue all packets until queue is empty,\n"
+	"		    confirm that average queue size is computed correctly while queue is empty\n"
+	"		    (this is a larger scale version of functional test 3)\n\n",
+	.htxt = "queue          "
+	"config         "
+	"q avg before   "
+	"q avg after    "
+	"expected       "
+	"difference %   "
+	"tolerance %    "
+	"result	 ""\n",
+	.tconfig = &ft6_tconfig,
+	.tqueue = &ft6_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft6_tlevel,
+};
+
+static enum test_result func_test6(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t j = 0;
+
+	printf("%s", tcfg->msg);
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+	printf("%s", tcfg->htxt);
+
+	for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+		rte_red_rt_data_init(&tcfg->tqueue->rdata[j]);
+		tcfg->tqueue->q[j] = 0;
+
+		if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+					  &tcfg->tqueue->rdata[j],
+					  &tcfg->tqueue->q[j],
+					  *tcfg->tlevel,
+					  tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+		if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+					   &tcfg->tqueue->rdata[j],
+					   &tcfg->tqueue->q[j],
+					   *tcfg->tlevel,
+					   tcfg->tqueue->avg_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+	}
+	for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+		double avg_before = 0;
+		double avg_after = 0;
+		double exp_avg = 0;
+		double diff = 0.0;
+
+		avg_before = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], 
+						   &tcfg->tqueue->rdata[j]);
+
+		/**
+		 * empty the queue
+		 */
+		tcfg->tqueue->q[j] = 0;
+		rte_red_mark_queue_empty(&tcfg->tqueue->rdata[j], get_port_ts());
+		rte_delay_us(tcfg->tvar->wait_usec);
+
+		/**
+		 * enqueue one packet to recalculate average queue size
+		 */
+		if (rte_red_enqueue(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], 
+				    &tcfg->tqueue->rdata[j],
+				    tcfg->tqueue->q[j],
+				    get_port_ts()) == 0) {
+			tcfg->tqueue->q[j]++;
+		} else {
+			printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__);
+			result = FAIL;
+		}
+
+		exp_avg = calc_exp_avg_on_empty(avg_before, 
+				(1 << tcfg->tconfig->wq_log2[tcfg->tqueue->qconfig[j]]),
+				tcfg->tvar->wait_usec);
+		avg_after = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+						&tcfg->tqueue->rdata[j]);
+		if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+		        result = FAIL;
+
+		printf("%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+		       j, tcfg->tqueue->qconfig[j], avg_before, avg_after,
+		       exp_avg, diff, (double)tcfg->tqueue->avg_tolerance,
+		       diff <= tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+	}
+out:
+	return (result);
+}
+
+/**
+ * setup default values for the performance test structures
+ */
+static struct rte_red_config pt_wrconfig[1];
+static struct rte_red pt_rtdata[1];
+static uint8_t pt_wq_log2[] = {9};
+static uint8_t pt_maxp_inv[] = {10}; 
+static uint32_t pt_qconfig[] = {0};
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+
+static struct test_rte_red_config pt_tconfig =  {
+	.rconfig = pt_wrconfig,
+	.num_cfg = DIM(pt_wrconfig),
+	.wq_log2 = pt_wq_log2,
+	.min_th = 32,
+	.max_th = 128,
+	.maxp_inv = pt_maxp_inv,
+};
+
+static struct test_queue pt_tqueue = {
+	.rdata = pt_rtdata,
+	.num_queues = DIM(pt_rtdata),
+	.qconfig = pt_qconfig,
+	.q = pt_q,
+	.q_ramp_up = 1000000,
+	.avg_ramp_up = 1000000,
+	.avg_tolerance = 5,  /* 10 percent */
+	.drop_tolerance = 50,  /* 50 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ */
+static void enqueue_dequeue_perf(struct rte_red_config *red_cfg,
+				 struct rte_red *red,
+				 uint32_t *q,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_red_enqueue(red_cfg, red, *q, ts );
+		rdtsc_prof_end(prof);
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * Setup test structures for tests P1, P2, P3 
+ * performance tests 1, 2 and 3
+ */
+static uint32_t pt1_tlevel[] = {16};
+static uint32_t pt2_tlevel[] = {80};
+static uint32_t pt3_tlevel[] = {144};
+
+static struct test_var perf1_tvar = {
+	.wait_usec = 0,
+	.num_iterations = 15,
+	.num_ops = 50000000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.sleep_sec = 0
+};
+
+static struct test_config perf1_test1_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one RED configuration,\n"
+	"		     set actual and average queue sizes to level below min threshold,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+static struct test_config perf1_test2_config = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one RED configuration,\n"
+	"		     set actual and average queue sizes to level in between min and max thresholds,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+static struct test_config perf1_test3_config = {
+	.ifname = "performance test 3 interface",
+	.msg = "performance test 3 : use one RED configuration,\n"
+	"		     set actual and average queue sizes to level above max threshold,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt3_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance. 
+ * This runs performance tests 1, 2 and 3 
+ */
+static enum test_result perf1_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * set average queue size to target level
+	 */
+	*tcfg->tqueue->q = *tcfg->tlevel;
+
+	/**
+	 * initialize the rte_red run time data structure
+	 */
+	rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+	/**
+	 *  set the queue average
+	 */
+	rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel);
+	if (rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata) 
+	    != *tcfg->tlevel) {
+		result = FAIL;
+		goto out;
+	}
+
+	enqueue_dequeue_perf(tcfg->tconfig->rconfig,
+			     tcfg->tqueue->rdata,
+			     tcfg->tqueue->q,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total,
+	       *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+	       *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return (result);
+}
+
+/**
+ * Setup test structures for tests P4, P5, P6 
+ * performance tests 4, 5 and 6
+ */
+static uint32_t pt4_tlevel[] = {16};
+static uint32_t pt5_tlevel[] = {80};
+static uint32_t pt6_tlevel[] = {144};
+
+static struct test_var perf2_tvar = {
+	.wait_usec = 500,
+	.num_iterations = 10000,
+	.num_ops = 10000,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.sleep_sec = 0
+};
+
+static struct test_config perf2_test4_config = {
+	.ifname = "performance test 4 interface",
+	.msg = "performance test 4 : use one RED configuration,\n"
+	"		     set actual and average queue sizes to level below min threshold,\n"
+	"		     dequeue all packets until queue is empty,\n"
+	"		     measure enqueue performance when queue is empty\n\n",
+	.htxt = "iteration      "
+	"q avg before   "
+	"q avg after    "
+	"expected       "
+	"difference %   "
+	"tolerance %    "
+	"result	 ""\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf2_tvar,
+	.tlevel = pt4_tlevel,
+};
+
+static struct test_config perf2_test5_config = {
+	.ifname = "performance test 5 interface",
+	.msg = "performance test 5 : use one RED configuration,\n"
+	"		     set actual and average queue sizes to level in between min and max thresholds,\n"
+	"		     dequeue all packets until queue is empty,\n"
+	"		     measure enqueue performance when queue is empty\n\n",
+	.htxt = "iteration      "
+	"q avg before   "
+	"q avg after    "
+	"expected       "
+	"difference     "
+	"tolerance      "
+	"result	 ""\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf2_tvar,
+	.tlevel = pt5_tlevel,
+};
+
+static struct test_config perf2_test6_config = {
+	.ifname = "performance test 6 interface",
+	.msg = "performance test 6 : use one RED configuration,\n"
+	"		     set actual and average queue sizes to level above max threshold,\n"
+	"		     dequeue all packets until queue is empty,\n"
+	"		     measure enqueue performance when queue is empty\n\n",
+	.htxt = "iteration      "
+	"q avg before   "
+	"q avg after    "
+	"expected       "
+	"difference %   "
+	"tolerance %    "
+	"result	 ""\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf2_tvar,
+	.tlevel = pt6_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance when the 
+ * queue is empty. This runs performance tests 4, 5 and 6 
+ */
+static enum test_result perf2_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_red_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt); 
+
+	for (i = 0; i < tcfg->tvar->num_iterations; i++) {
+		uint32_t count = 0;
+		uint64_t ts = 0;
+		double avg_before = 0;
+		int ret = 0;
+
+		/**
+		 * set average queue size to target level
+		 */
+		*tcfg->tqueue->q = *tcfg->tlevel;
+		count = (*tcfg->tqueue->rdata).count;
+
+		/**
+		 * initialize the rte_red run time data structure
+		 */
+		rte_red_rt_data_init(tcfg->tqueue->rdata);
+		(*tcfg->tqueue->rdata).count = count;
+
+		/**
+		 * set the queue average
+		 */
+		rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel);
+		avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+		if ((avg_before < *tcfg->tlevel) || (avg_before > *tcfg->tlevel)) {
+			result = FAIL;
+			goto out;
+		}
+
+		/**
+		 * empty the queue
+		 */
+		*tcfg->tqueue->q = 0;
+		rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+		/**
+		 * wait for specified period of time
+		 */
+		rte_delay_us(tcfg->tvar->wait_usec);
+
+		/**
+		 * measure performance of enqueue operation while queue is empty
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(&prof);
+		ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, 
+				      *tcfg->tqueue->q, ts );
+		rdtsc_prof_end(&prof);
+
+		/**
+		 * gather enqueued/dropped statistics
+		 */
+		if (ret == 0)
+			(*tcfg->tvar->enqueued)++;
+		else
+			(*tcfg->tvar->dropped)++;
+
+		/**
+		 * on first and last iteration, confirm that
+		 * average queue size was computed correctly
+		 */
+		if ((i == 0) || (i == tcfg->tvar->num_iterations - 1)) {
+			double avg_after = 0;
+			double exp_avg = 0;
+			double diff = 0.0;
+			int ok = 0;
+
+			avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+			exp_avg = calc_exp_avg_on_empty(avg_before, 
+						  (1 << *tcfg->tconfig->wq_log2),
+						  tcfg->tvar->wait_usec);
+			if (check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+		        	ok = 1;
+			printf("%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+				i, avg_before, avg_after, exp_avg, diff,
+				(double)tcfg->tqueue->avg_tolerance, ok ? "pass" : "fail");
+			if (!ok) {
+				result = FAIL;
+				goto out;
+			}
+		}
+	}
+	total =  *tcfg->tvar->enqueued +  *tcfg->tvar->dropped;
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total,
+	       *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+	       *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return (result);
+}
+
+/**
+ * setup default values for overflow test structures
+ */
+static uint32_t avg_max = 0;
+static uint32_t avg_max_bits = 0;
+
+static struct rte_red_config ovfl_wrconfig[1];
+static struct rte_red ovfl_rtdata[1];
+static uint8_t ovfl_maxp_inv[] = {10}; 
+static uint32_t ovfl_qconfig[] = {0, 0, 1, 1};
+static uint32_t ovfl_q[] ={0};
+static uint32_t ovfl_dropped[] ={0};
+static uint32_t ovfl_enqueued[] ={0};
+static uint32_t ovfl_tlevel[] = {1023};
+static uint8_t ovfl_wq_log2[] = {12};
+
+static struct test_rte_red_config ovfl_tconfig =  {
+	.rconfig = ovfl_wrconfig,
+	.num_cfg = DIM(ovfl_wrconfig),
+	.wq_log2 = ovfl_wq_log2,
+	.min_th = 32,
+	.max_th = 1023,
+	.maxp_inv = ovfl_maxp_inv,
+};
+
+static struct test_queue ovfl_tqueue = {
+	.rdata = ovfl_rtdata,
+	.num_queues = DIM(ovfl_rtdata),
+	.qconfig = ovfl_qconfig,
+	.q = ovfl_q,
+	.q_ramp_up = 1000000,
+	.avg_ramp_up = 1000000,
+	.avg_tolerance = 5,  /* 10 percent */
+	.drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_var ovfl_tvar = {
+	.wait_usec = 10000,
+	.num_iterations = 1,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ovfl_dropped,
+	.enqueued = ovfl_enqueued,
+	.sleep_sec = 0
+};
+
+static void ovfl_check_avg(uint32_t avg)
+{
+	if (avg > avg_max) {
+		double avg_log = 0;
+		uint32_t bits = 0;
+		avg_max = avg;
+		avg_log = log(((double)avg_max));
+		avg_log = avg_log / log(2.0);
+		bits = (uint32_t)ceil(avg_log);
+		if (bits > avg_max_bits)
+			avg_max_bits = bits;
+	}
+}
+
+static struct test_config ovfl_test1_config = {
+	.ifname = "queue avergage overflow test interface",
+	.msg = "overflow test 1 : use one RED configuration,\n"
+	"		  increase average queue size to target level,\n"
+	"		  check maximum number of bits requirte_red to represent avg_s\n\n",
+	.htxt = "avg queue size  "
+	"wq_log2  "
+	"fraction bits  "
+	"max queue avg  "
+	"num bits  "
+	"enqueued  "
+	"dropped   "
+	"drop prob %  "
+	"drop rate %  "
+	"\n",
+	.tconfig = &ovfl_tconfig,
+	.tqueue = &ovfl_tqueue,
+	.tvar = &ovfl_tvar,
+	.tlevel = ovfl_tlevel,
+};
+
+static enum test_result ovfl_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t avg = 0;
+	uint32_t i = 0;
+	double drop_rate = 0.0;
+	double drop_prob = 0.0;
+	double diff = 0.0;
+	int ret = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_red_init(tcfg) != PASS) {
+
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * reset rte_red run-time data
+	 */
+	rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+	/**
+	 * increase actual queue size
+	 */
+	for (i = 0; i < tcfg->tqueue->q_ramp_up; i++) {
+		ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+				      *tcfg->tqueue->q, get_port_ts());
+
+		if (ret == 0) {
+			if (++(*tcfg->tqueue->q) >= *tcfg->tlevel)
+				break;
+		}
+	}
+
+	/**
+	 * enqueue
+	 */
+	for (i = 0; i < tcfg->tqueue->avg_ramp_up; i++) {
+		ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+				      *tcfg->tqueue->q, get_port_ts());
+		ovfl_check_avg((*tcfg->tqueue->rdata).avg);
+		avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+		if (avg == *tcfg->tlevel) {
+			if (ret == 0)
+				(*tcfg->tvar->enqueued)++;
+			else
+				(*tcfg->tvar->dropped)++;
+		}
+	}
+
+	/**
+	 * check if target average queue size has been reached
+	 */
+	avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+	if (avg != *tcfg->tlevel) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * check drop rate against drop probability
+	 */
+	drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+	drop_prob = calc_drop_prob(tcfg->tconfig->min_th,
+				   tcfg->tconfig->max_th,
+				   *tcfg->tconfig->maxp_inv,
+				   *tcfg->tlevel);
+	if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+	        result = FAIL;
+
+	printf("%s", tcfg->htxt);
+	
+	printf("%-16u%-9u%-15u0x%08x     %-10u%-10u%-10u%-13.2lf%-13.2lf\n",
+	       avg, *tcfg->tconfig->wq_log2, RTE_RED_SCALING,
+	       avg_max, avg_max_bits,
+	       *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+	       drop_prob * 100.0, drop_rate * 100.0);
+out:
+	return (result);
+}
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_tests[] = { 
+	{ &func_test1_config, func_test1 },
+	{ &func_test2_config, func_test2 },		
+	{ &func_test3_config, func_test3 },
+	{ &func_test4_config, func_test4 },
+	{ &func_test5_config, func_test5 },
+	{ &func_test6_config, func_test6 },
+	{ &ovfl_test1_config, ovfl_test1 }, 
+};
+
+struct tests perf_tests[] = { 
+	{ &perf1_test1_config, perf1_test },
+	{ &perf1_test2_config, perf1_test },
+	{ &perf1_test3_config, perf1_test },
+	{ &perf2_test4_config, perf2_test },
+	{ &perf2_test5_config, perf2_test },
+	{ &perf2_test6_config, perf2_test },
+};
+
+/**
+ * function to execute the required_red tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count, uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n--------------------------------------------------------------------------------\n");
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("-------------------------------------<pass>-------------------------------------\n");
+		} else {
+			printf("-------------------------------------<fail>-------------------------------------\n");
+		}
+	}
+	return;
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized RED
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_red_config config;
+
+	if (rte_red_rt_data_init(NULL) == 0) {
+		printf("rte_red_rt_data_init should have failed!\n");
+		return -1;
+	}
+
+	if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("rte_red_config_init should have failed!\n");
+		return -1;
+	}
+
+	if (rte_red_rt_data_init(NULL) == 0) {
+		printf("rte_red_rt_data_init should have failed!\n");
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+	/* min_treshold == max_treshold */
+	if (rte_red_config_init(&config, 0, 1, 1, 0) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+	/* min_treshold > max_treshold */
+	if (rte_red_config_init(&config, 0, 2, 1, 0) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+	/* wq_log2 > RTE_RED_WQ_LOG2_MAX */
+	if (rte_red_config_init(&config,
+			RTE_RED_WQ_LOG2_MAX + 1, 1, 2, 0) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+	/* wq_log2 < RTE_RED_WQ_LOG2_MIN */
+	if (rte_red_config_init(&config,
+			RTE_RED_WQ_LOG2_MIN - 1, 1, 2, 0) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+	/* maxp_inv > RTE_RED_MAXP_INV_MAX */
+	if (rte_red_config_init(&config,
+			RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MAX + 1) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+	/* maxp_inv < RTE_RED_MAXP_INV_MIN */
+	if (rte_red_config_init(&config,
+			RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MIN - 1) == 0) {
+		printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+		return -1;
+	}
+
+	return 0;
+}
+
+int test_red(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+	int ret = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_tests, DIM(func_tests), &num_tests, &num_pass);
+	run_tests(perf_tests, DIM(perf_tests), &num_tests, &num_pass);
+
+	if (num_pass == num_tests) {
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+		ret = 0;
+	} else {
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass, num_tests - num_pass);
+		ret = -1;
+	}
+	return (ret);
+}
+
+#else
+
+int
+test_red(void)
+{
+	printf("The SCHED library is not included in this build\n");
+	return 0;
+}
+
+#endif
diff --git a/app/test/test_sched.c b/app/test/test_sched.c
new file mode 100755
index 0000000000..a0efa52491
--- /dev/null
+++ b/app/test/test_sched.c
@@ -0,0 +1,244 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#if defined(RTE_LIBRTE_SCHED) && defined(RTE_ARCH_X86_64)
+
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+#include <rte_sched.h>
+
+
+#define VERIFY(exp,fmt,args...)                    	                \
+		if (!(exp)) {                                               \
+			printf(fmt, ##args);                                    \
+			return -1;                                              \
+		}
+
+
+#define SUBPORT 	0
+#define PIPE 		1
+#define TC 			2
+#define QUEUE 		3
+
+static struct rte_sched_subport_params subport_param[] = {
+	{
+		.tb_rate = 1250000000,
+		.tb_size = 1000000,
+
+		.tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+		.tc_period = 10,
+	},
+};
+
+static struct rte_sched_pipe_params pipe_profile[] = {
+	{ /* Profile #0 */
+		.tb_rate = 305175,
+		.tb_size = 1000000,
+
+		.tc_rate = {305175, 305175, 305175, 305175},
+		.tc_period = 40,
+
+		.wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
+	},
+};
+
+static struct rte_sched_port_params port_param = {
+	.name = "port_0",
+	.socket = 0, /* computed */
+	.rate = 0, /* computed */
+	.frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+	.n_subports_per_port = 1,
+	.n_pipes_per_subport = 4096,
+	.qsize = {64, 64, 64, 64},
+	.pipe_profiles = pipe_profile,
+	.n_pipe_profiles = 1,
+};
+
+#define NB_MBUF          32
+#define MAX_PACKET_SZ    2048
+#define MBUF_SZ (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define PKT_BURST_SZ     32
+#define MEMPOOL_CACHE_SZ PKT_BURST_SZ
+#define SOCKET           0
+
+
+static struct rte_mempool *
+create_mempool(void)
+{
+	struct rte_mempool * mp;
+
+	mp = rte_mempool_lookup("test_sched");
+	if (!mp)
+		mp = rte_mempool_create("test_sched",
+				NB_MBUF,
+				MBUF_SZ,
+				MEMPOOL_CACHE_SZ,
+				sizeof(struct rte_pktmbuf_pool_private),
+				rte_pktmbuf_pool_init,
+				NULL,
+				rte_pktmbuf_init,
+				NULL,
+				SOCKET,
+				0);
+
+	return mp;
+}
+
+static void
+prepare_pkt(struct rte_mbuf *mbuf)
+{
+	struct ether_hdr *eth_hdr;
+	struct vlan_hdr *vlan1, *vlan2;
+	struct ipv4_hdr *ip_hdr;
+
+	/* Simulate a classifier */
+	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	vlan1 = (struct vlan_hdr *)(&eth_hdr->ether_type );
+	vlan2 = (struct vlan_hdr *)((uintptr_t)&eth_hdr->ether_type + sizeof(struct vlan_hdr));
+	eth_hdr = (struct ether_hdr *)((uintptr_t)&eth_hdr->ether_type + 2 *sizeof(struct vlan_hdr));
+	ip_hdr = (struct ipv4_hdr *)((uintptr_t)eth_hdr +  sizeof(eth_hdr->ether_type));
+
+	vlan1->vlan_tci = rte_cpu_to_be_16(SUBPORT);
+	vlan2->vlan_tci = rte_cpu_to_be_16(PIPE);
+	eth_hdr->ether_type =  rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+	ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE);
+
+
+	rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE, e_RTE_METER_YELLOW);
+
+	/* 64 byte packet */
+	mbuf->pkt.pkt_len  = 60;
+	mbuf->pkt.data_len = 60;
+}
+
+
+/**
+ * test main entrance for library sched
+ */
+int 
+test_sched(void)
+{
+	struct rte_mempool *mp = NULL;
+	struct rte_sched_port *port = NULL;
+	uint32_t pipe;
+	struct rte_mbuf *in_mbufs[10];
+	struct rte_mbuf *out_mbufs[10];
+	int i;
+
+	int err;
+
+	mp = create_mempool();
+
+	port_param.socket = 0;
+	port_param.rate = (uint64_t) 10000 * 1000 * 1000 / 8;
+	port_param.name = "port_0";
+
+	port = rte_sched_port_config(&port_param);
+	VERIFY(port != NULL, "Error config sched port\n");
+
+	
+	err = rte_sched_subport_config(port, SUBPORT, subport_param);
+	VERIFY(err == 0, "Error config sched, err=%d\n", err);
+
+	for (pipe = 0; pipe < port_param.n_pipes_per_subport; pipe ++) {
+		err = rte_sched_pipe_config(port, SUBPORT, pipe, 0);
+		VERIFY(err == 0, "Error config sched pipe %u, err=%d\n", pipe, err);
+	}
+
+	for (i = 0; i < 10; i++) {
+		in_mbufs[i] = rte_pktmbuf_alloc(mp);
+		prepare_pkt(in_mbufs[i]);
+	}
+
+
+	err = rte_sched_port_enqueue(port, in_mbufs, 10);
+	VERIFY(err == 10, "Wrong enqueue, err=%d\n", err);
+
+	err = rte_sched_port_dequeue(port, out_mbufs, 10);
+	VERIFY(err == 10, "Wrong dequeue, err=%d\n", err);
+
+	for (i = 0; i < 10; i++) {
+		enum rte_meter_color color;
+		uint32_t subport, traffic_class, queue;
+
+		color = rte_sched_port_pkt_read_color(out_mbufs[i]);
+		VERIFY(color == e_RTE_METER_YELLOW, "Wrong color\n");
+
+		rte_sched_port_pkt_read_tree_path(out_mbufs[i],
+				&subport, &pipe, &traffic_class, &queue);
+
+		VERIFY(subport == SUBPORT, "Wrong subport\n");
+		VERIFY(pipe == PIPE, "Wrong pipe\n");
+		VERIFY(traffic_class == TC, "Wrong traffic_class\n");
+		VERIFY(queue == QUEUE, "Wrong queue\n");
+
+	}
+
+
+	struct rte_sched_subport_stats subport_stats;
+	uint32_t tc_ov;
+	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov);
+	//VERIFY(subport_stats.n_pkts_tc[TC-1] == 10, "Wrong subport stats\n");
+
+	struct rte_sched_queue_stats queue_stats;
+	uint16_t qlen;
+	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
+	//VERIFY(queue_stats.n_pkts == 10, "Wrong queue stats\n");
+
+	rte_sched_port_free(port);
+
+	return 0;
+}
+
+#else /* RTE_LIBRTE_SCHED */
+
+int
+test_sched(void)
+{
+	printf("The Scheduler library is not included in this build\n");
+	return 0;
+}
+#endif /* RTE_LIBRTE_SCHED */
diff --git a/config/defconfig_i686-default-linuxapp-gcc b/config/defconfig_i686-default-linuxapp-gcc
index 5960e85ff5..a63d37a1d0 100644
--- a/config/defconfig_i686-default-linuxapp-gcc
+++ b/config/defconfig_i686-default-linuxapp-gcc
@@ -233,6 +233,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 
+#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
 #
 # Compile librte_kni
 #
diff --git a/config/defconfig_i686-default-linuxapp-icc b/config/defconfig_i686-default-linuxapp-icc
index 6c6a59d9de..cf86ba5b08 100644
--- a/config/defconfig_i686-default-linuxapp-icc
+++ b/config/defconfig_i686-default-linuxapp-icc
@@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 
+#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
 #
 # Compile librte_kni
 #
diff --git a/config/defconfig_x86_64-default-linuxapp-gcc b/config/defconfig_x86_64-default-linuxapp-gcc
index 1dcc8c6486..b5d3362afc 100644
--- a/config/defconfig_x86_64-default-linuxapp-gcc
+++ b/config/defconfig_x86_64-default-linuxapp-gcc
@@ -235,6 +235,17 @@ CONFIG_RTE_LIBRTE_NET=y
 CONFIG_RTE_LIBRTE_METER=y
 
 #
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
+#
+# Compile the test application
 # Compile librte_kni
 #
 CONFIG_RTE_LIBRTE_KNI=y
diff --git a/config/defconfig_x86_64-default-linuxapp-icc b/config/defconfig_x86_64-default-linuxapp-icc
index 7053a1067e..60f10afd16 100644
--- a/config/defconfig_x86_64-default-linuxapp-icc
+++ b/config/defconfig_x86_64-default-linuxapp-icc
@@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 
+#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
 #
 # Compile librte_kni
 #
diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile
new file mode 100755
index 0000000000..08f4d191c0
--- /dev/null
+++ b/examples/qos_sched/Makefile
@@ -0,0 +1,58 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without 
+#   modification, are permitted provided that the following conditions 
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright 
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright 
+#       notice, this list of conditions and the following disclaimer in 
+#       the documentation and/or other materials provided with the 
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its 
+#       contributors may be used to endorse or promote products derived 
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-default-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# binary name
+APP = qos_sched
+
+# all source are stored in SRCS-y
+SRCS-y := main.c args.c init.c app_thread.c cfg_file.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_sched
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
new file mode 100755
index 0000000000..afce5efe24
--- /dev/null
+++ b/examples/qos_sched/app_thread.c
@@ -0,0 +1,302 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_sched.h>
+
+#include "main.h"
+
+/*
+ * QoS parameters are encoded as follows:
+ *		Outer VLAN ID defines subport
+ *		Inner VLAN ID defines pipe
+ *		Destination IP 0.0.XXX.0 defines traffic class
+ *		Destination IP host (0.0.0.XXX) defines queue
+ * Values below define offset to each field from start of frame
+ */
+#define SUBPORT_OFFSET	7
+#define PIPE_OFFSET		9
+#define TC_OFFSET		20
+#define QUEUE_OFFSET	20
+#define COLOR_OFFSET	19
+
+static inline int
+get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe,
+			uint32_t *traffic_class, uint32_t *queue, uint32_t *color)
+{
+	uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *);
+
+	*subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) &
+			(port_params.n_subports_per_port - 1); /* Outer VLAN ID*/
+	*pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) &
+			(port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */
+	*traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) &
+			(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */
+	*queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) &
+			(RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */
+	*color = pdata[COLOR_OFFSET] & 0x03; 	/* Destination IP */
+
+	return 0;
+}
+
+void
+app_rx_thread(struct thread_conf **confs)
+{
+	uint32_t i, nb_rx;
+	struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned;
+	struct thread_conf *conf;
+	int conf_idx = 0;
+
+	uint32_t subport;
+	uint32_t pipe;
+	uint32_t traffic_class;
+	uint32_t queue;
+	uint32_t color;
+
+	while ((conf = confs[conf_idx])) {
+		nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs,
+				burst_conf.rx_burst);
+
+		if (likely(nb_rx != 0)) {
+			APP_STATS_ADD(conf->stat.nb_rx, nb_rx);
+
+			for(i = 0; i < nb_rx; i++) {
+				get_pkt_sched(rx_mbufs[i],
+						&subport, &pipe, &traffic_class, &queue, &color);
+				rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe,
+						traffic_class, queue, (enum rte_meter_color) color);
+			}
+
+			if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring,
+								(void **)rx_mbufs, nb_rx) != 0)) {
+				for(i = 0; i < nb_rx; i++) {
+					rte_pktmbuf_free(rx_mbufs[i]);
+
+					APP_STATS_ADD(conf->stat.nb_drop, 1);
+				}
+			}
+		}
+		conf_idx++;
+		if (confs[conf_idx] == NULL)
+			conf_idx = 0;
+	}
+}
+
+
+
+/* Send the packet to an output interface
+ * For performance reason function returns number of packets dropped, not sent,
+ * so 0 means that all packets were sent successfully
+ */
+
+static inline void
+app_send_burst(struct thread_conf *qconf)
+{
+	struct rte_mbuf **mbufs;
+	uint32_t n, ret;
+
+	mbufs = (struct rte_mbuf **)qconf->m_table;
+	n = qconf->n_mbufs;
+
+	do {
+		ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
+		if (unlikely(ret < n)) { /* we cannot drop the packets, so re-send */
+			/* update number of packets to be sent */
+			n -= ret;
+			mbufs = (struct rte_mbuf **)&mbufs[ret];
+			/* limit number of retries to avoid endless loop */
+			/* reset retry counter if some packets were sent */
+			if (likely(ret != 0)) {
+				continue;
+			}
+		}
+	} while (ret != n);
+}
+
+
+/* Send the packet to an output interface */
+static void
+app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
+{
+	uint32_t i, len;
+
+	len = qconf->n_mbufs;
+	for(i = 0; i < nb_pkt; i++) {
+		qconf->m_table[len] = mbufs[i];
+		len++;
+		/* enough pkts to be sent */
+		if (unlikely(len == burst_conf.tx_burst)) {
+			qconf->n_mbufs = len;
+			app_send_burst(qconf);
+			len = 0;
+		}
+	}
+
+	qconf->n_mbufs = len;
+}
+
+void
+app_tx_thread(struct thread_conf **confs)
+{
+	struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
+	struct thread_conf *conf;
+	int conf_idx = 0;
+	int retval;
+	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+	while ((conf = confs[conf_idx])) {
+		retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
+					burst_conf.qos_dequeue);
+		if (likely(retval == 0)) {
+			app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
+
+			conf->counter = 0; /* reset empty read loop counter */
+		}
+
+		conf->counter++;
+
+		/* drain ring and TX queues */
+		if (unlikely(conf->counter > drain_tsc)) {
+			/* now check is there any packets left to be transmitted */
+			if (conf->n_mbufs != 0) {
+				app_send_burst(conf);
+
+				conf->n_mbufs = 0;
+			}
+			conf->counter = 0;
+		}
+
+		conf_idx++;
+		if (confs[conf_idx] == NULL)
+			conf_idx = 0;
+	}
+}
+
+
+void
+app_worker_thread(struct thread_conf **confs)
+{
+	struct rte_mbuf *mbufs[burst_conf.ring_burst];
+	struct thread_conf *conf;
+	int conf_idx = 0;
+
+	while ((conf = confs[conf_idx])) {
+		uint32_t nb_pkt;
+		int retval;
+
+		/* Read packet from the ring */
+		retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+					burst_conf.ring_burst);
+		if (likely(retval == 0)) {
+			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+					burst_conf.ring_burst);
+
+			APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+			APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+		}
+
+		nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+					burst_conf.qos_dequeue);
+		if (likely(nb_pkt > 0))
+			while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0);
+
+		conf_idx++;
+		if (confs[conf_idx] == NULL)
+			conf_idx = 0;
+	}
+}
+
+
+void
+app_mixed_thread(struct thread_conf **confs)
+{
+	struct rte_mbuf *mbufs[burst_conf.ring_burst];
+	struct thread_conf *conf;
+	int conf_idx = 0;
+	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+	while ((conf = confs[conf_idx])) {
+		uint32_t nb_pkt;
+		int retval;
+
+		/* Read packet from the ring */
+		retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+					burst_conf.ring_burst);
+		if (likely(retval == 0)) {
+			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+					burst_conf.ring_burst);
+
+			APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+			APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+		}
+
+
+		nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+					burst_conf.qos_dequeue);
+		if (likely(nb_pkt > 0)) {
+			app_send_packets(conf, mbufs, nb_pkt);
+
+			conf->counter = 0; /* reset empty read loop counter */
+		}
+
+		conf->counter++;
+
+		/* drain ring and TX queues */
+		if (unlikely(conf->counter > drain_tsc)) {
+
+			/* now check is there any packets left to be transmitted */
+			if (conf->n_mbufs != 0) {
+				app_send_burst(conf);
+
+				conf->n_mbufs = 0;
+			}
+			conf->counter = 0;
+		}
+
+		conf_idx++;
+		if (confs[conf_idx] == NULL)
+			conf_idx = 0;
+	}
+}
+
+
diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
new file mode 100755
index 0000000000..c9cfdb24b3
--- /dev/null
+++ b/examples/qos_sched/args.c
@@ -0,0 +1,467 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <locale.h>
+#include <unistd.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+
+#define APP_NAME "qos_sched"
+#define MAX_OPT_VALUES 8
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/"
+
+static uint32_t app_master_core = 1;
+static uint32_t app_numa_mask;
+static uint64_t app_used_core_mask = 0;
+static uint64_t app_used_port_mask = 0;
+static uint64_t app_used_rx_port_mask = 0;
+static uint64_t app_used_tx_port_mask = 0;
+
+
+static const char usage[] =
+	"                                                                               \n"
+	"    %s <APP PARAMS>                                                            \n"
+	"                                                                               \n"
+	"Application mandatory parameters:                                              \n"
+	"    --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n"
+	"           multiple pfc can be configured in command line                      \n"
+	"                                                                               \n"
+	"Application optional parameters:                                               \n"
+	"    --mst I : master core index (default value is %u)                          \n" 
+	"    --rsz \"A, B, C\" :   Ring sizes                                           \n"
+	"           A = Size (in number of buffer descriptors) of each of the NIC RX    \n"
+	"               rings read by the I/O RX lcores (default value is %u)           \n"
+	"           B = Size (in number of elements) of each of the SW rings used by the\n"
+	"               I/O RX lcores to send packets to worker lcores (default value is\n"
+	"               %u)                                                             \n"
+	"           C = Size (in number of buffer descriptors) of each of the NIC TX    \n"
+	"               rings written by worker lcores (default value is %u)            \n"
+	"    --bsz \"A, B, C, D\": Burst sizes                                          \n"
+	"           A = I/O RX lcore read burst size from NIC RX (default value is %u)  \n"
+	"           B = I/O RX lcore write burst size to output SW rings,               \n"
+	"               Worker lcore read burst size from input SW rings,               \n"
+	"               QoS enqueue size (default value is %u)                          \n"
+	"           C = QoS dequeue size (default value is %u)                          \n"
+	"           D = Worker lcore write burst size to NIC TX (default value is %u)   \n"
+	"    --rth \"A, B, C\" :   RX queue threshold parameters                        \n"
+	"           A = RX prefetch threshold (default value is %u)                     \n"
+	"           B = RX host threshold (default value is %u)                         \n"
+	"           C = RX write-back threshold (default value is %u)                   \n"
+	"    --tth \"A, B, C\" :   TX queue threshold parameters                        \n"
+	"           A = TX prefetch threshold (default value is %u)                     \n"
+	"           B = TX host threshold (default value is %u)                         \n"
+	"           C = TX write-back threshold (default value is %u)                   \n"
+	"    --cfg FILE : profile configuration to load                                 \n"
+;
+
+/* display usage */
+static void
+app_usage(const char *prgname)
+{
+	printf(usage, prgname, app_master_core,
+		APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT,
+		MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE, MAX_PKT_TX_BURST,
+		RX_PTHRESH, RX_HTHRESH, RX_WTHRESH,
+		TX_PTHRESH, TX_HTHRESH, TX_WTHRESH
+		);
+}
+
+static inline int str_is(const char *str, const char *is)
+{
+	return (strcmp(str, is) == 0);
+}
+
+/* returns core mask used by DPDK */
+static uint64_t
+app_eal_core_mask(void)
+{
+	uint32_t i;
+	uint64_t cm = 0;
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (cfg->lcore_role[i] == ROLE_RTE)
+			cm |= (1ULL << i);
+	}
+
+	cm |= (1ULL << cfg->master_lcore);
+
+	return cm;
+}
+
+
+/* returns total number of cores presented in a system */
+static uint32_t
+app_cpu_core_count(void)
+{
+	int i, len;
+	char path[PATH_MAX];
+	uint32_t ncores = 0;
+
+	for(i = 0; i < RTE_MAX_LCORE; i++) {
+		len = rte_snprintf(path, sizeof(path), SYS_CPU_DIR, i);
+		if (len <= 0 || (unsigned)len >= sizeof(path))
+			continue;
+
+		if (access(path, F_OK) == 0)
+			ncores++;
+	}
+
+	return ncores;
+}
+
+/* returns:
+	 number of values parsed
+	-1 in case of error
+*/
+static int
+app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals)
+{
+	char *string;
+	uint32_t i, n_tokens;
+	char *tokens[MAX_OPT_VALUES];
+
+	if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES)
+		return -1;
+
+	/* duplicate configuration string before splitting it to tokens */
+	string = strdup(conf_str);
+	if (string == NULL)
+		return -1;
+
+	n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator);
+
+	for(i = 0; i < n_tokens; i++) {
+		opt_vals[i] = (uint32_t)atol(tokens[i]);
+	}
+	
+	free(string);
+
+	return n_tokens;
+}
+
+static int
+app_parse_ring_conf(const char *conf_str)
+{
+	int ret;
+	uint32_t vals[3];
+
+	ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+	if (ret != 3)	
+		return ret;
+
+	ring_conf.rx_size = vals[0];
+	ring_conf.ring_size = vals[1];
+	ring_conf.tx_size = vals[2];
+
+	return 0;
+}
+
+static int
+app_parse_rth_conf(const char *conf_str)
+{
+	int ret;
+	uint32_t vals[3];
+
+	ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+	if (ret != 3)	
+		return ret;
+
+	rx_thresh.pthresh = (uint8_t)vals[0];
+	rx_thresh.hthresh = (uint8_t)vals[1];
+	rx_thresh.wthresh = (uint8_t)vals[2];
+
+	return 0;
+}
+
+static int
+app_parse_tth_conf(const char *conf_str)
+{
+	int ret;
+	uint32_t vals[3];
+
+	ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+	if (ret != 3)	
+		return ret;
+
+	tx_thresh.pthresh = (uint8_t)vals[0];
+	tx_thresh.hthresh = (uint8_t)vals[1];
+	tx_thresh.wthresh = (uint8_t)vals[2];
+
+	return 0;
+}
+
+static int
+app_parse_flow_conf(const char *conf_str)
+{
+	int ret;
+	uint32_t vals[5];
+	struct flow_conf *pconf;
+	uint64_t mask;
+
+	ret = app_parse_opt_vals(conf_str, ',', 6, vals);
+	if (ret < 4 || ret > 5)
+		return ret;
+
+	pconf = &qos_conf[nb_pfc];
+
+	pconf->rx_port = (uint8_t)vals[0];
+	pconf->tx_port = (uint8_t)vals[1];
+	pconf->rx_core = (uint8_t)vals[2];
+	pconf->wt_core = (uint8_t)vals[3];
+	if (ret == 5)
+		pconf->tx_core = (uint8_t)vals[4];
+	else
+		pconf->tx_core = pconf->wt_core;
+
+	if (pconf->rx_core == pconf->wt_core) {
+		RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc);
+		return -1;
+	}
+
+	if (pconf->rx_port >= RTE_MAX_ETHPORTS) {
+		RTE_LOG(ERR, APP, "pfc %u: invalid rx port %hu index\n", nb_pfc, pconf->rx_port);
+		return -1;
+	}
+	if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
+		RTE_LOG(ERR, APP, "pfc %u: invalid tx port %hu index\n", nb_pfc, pconf->rx_port);
+		return -1;
+	}
+
+	mask = 1lu << pconf->rx_port;
+	if (app_used_rx_port_mask & mask) {
+		RTE_LOG(ERR, APP, "pfc %u: rx port %hu is used already\n", nb_pfc, pconf->rx_port);
+		return -1;
+	}
+	app_used_rx_port_mask |= mask;
+	app_used_port_mask |= mask;
+
+	mask = 1lu << pconf->tx_port;
+	if (app_used_tx_port_mask & mask) {
+		RTE_LOG(ERR, APP, "pfc %u: port %hu is used already\n", nb_pfc, pconf->tx_port);
+		return -1;
+	}
+	app_used_tx_port_mask |= mask;
+	app_used_port_mask |= mask;
+
+	mask = 1lu << pconf->rx_core;
+	app_used_core_mask |= mask;
+
+	mask = 1lu << pconf->wt_core;
+	app_used_core_mask |= mask;
+
+	mask = 1lu << pconf->tx_core;
+	app_used_core_mask |= mask;
+
+	nb_pfc++;
+
+	return 0;
+}
+
+static int
+app_parse_burst_conf(const char *conf_str)
+{
+	int ret;
+	uint32_t vals[4];
+
+	ret = app_parse_opt_vals(conf_str, ',', 4, vals);
+	if (ret != 4)
+		return ret;
+
+	burst_conf.rx_burst    = (uint16_t)vals[0];
+	burst_conf.ring_burst  = (uint16_t)vals[1];
+	burst_conf.qos_dequeue = (uint16_t)vals[2];
+	burst_conf.tx_burst    = (uint16_t)vals[3];
+
+	return 0;
+}
+
+/* 
+ * Parses the argument given in the command line of the application,
+ * calculates mask for used cores and initializes EAL with calculated core mask
+ */
+int
+app_parse_args(int argc, char **argv)
+{
+	int opt, ret;
+	int option_index;
+	const char *optname;
+	char *prgname = argv[0];
+	uint32_t i, nb_lcores;
+
+	static struct option lgopts[] = {
+		{ "pfc", 1, 0, 0 },
+		{ "mst", 1, 0, 0 },
+		{ "rsz", 1, 0, 0 },
+		{ "bsz", 1, 0, 0 },
+		{ "rth", 1, 0, 0 },
+		{ "tth", 1, 0, 0 },
+		{ "cfg", 1, 0, 0 },
+		{ NULL,  0, 0, 0 }
+	};
+
+	/* initialize EAL first */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		return -1;
+
+	argc -= ret;
+	argv += ret;
+
+	/* set en_US locale to print big numbers with ',' */
+	setlocale(LC_NUMERIC, "en_US.utf-8");
+
+	while ((opt = getopt_long(argc, argv, "",
+		lgopts, &option_index)) != EOF) {
+
+			switch (opt) {
+			/* long options */
+			case 0:
+				optname = lgopts[option_index].name;
+				if (str_is(optname, "pfc")) {
+					ret = app_parse_flow_conf(optarg);
+					if (ret) {
+						RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg);
+						return -1;
+					}
+					break;
+				}
+				if (str_is(optname, "mst")) {
+					app_master_core = (uint32_t)atoi(optarg);
+					break;
+				}
+				if (str_is(optname, "rsz")) {
+					ret = app_parse_ring_conf(optarg);
+					if (ret) {
+						RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg);
+						return -1;
+					}
+					break;
+				}
+				if (str_is(optname, "bsz")) {
+					ret = app_parse_burst_conf(optarg);
+					if (ret) {
+						RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg);
+						return -1;
+					}
+					break;
+				}
+				if (str_is(optname, "rth")) {
+					ret = app_parse_rth_conf(optarg);
+					if (ret) {
+						RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg);
+						return -1;
+					}
+					break;
+				}
+				if (str_is(optname, "tth")) {
+					ret = app_parse_tth_conf(optarg);
+					if (ret) {
+						RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg);
+						return -1;
+					}
+					break;
+				}
+				if (str_is(optname, "cfg")) {
+					cfg_profile = optarg;
+					break;
+				}
+				break;
+
+			default:
+				app_usage(prgname);
+				return -1;
+			}
+	}
+
+	/* check master core index validity */
+	for(i = 0; i <= app_master_core; i++) {
+		if (app_used_core_mask & (1u << app_master_core)) {
+			RTE_LOG(ERR, APP, "Master core index is not configured properly\n");
+			app_usage(prgname);
+			return -1;
+		}
+	}
+	app_used_core_mask |= 1u << app_master_core;
+
+	if ((app_used_core_mask != app_eal_core_mask()) ||
+			(app_master_core != rte_get_master_lcore())) {
+		RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64
+				" instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask());
+		return -1;
+	}
+
+	if (nb_pfc == 0) {
+		RTE_LOG(ERR, APP, "Packet flow not configured!\n");
+		app_usage(prgname);
+		return -1;
+	}
+
+	/* sanity check for cores assignment */
+	nb_lcores = app_cpu_core_count();
+
+	for(i = 0; i < nb_pfc; i++) {
+		if (qos_conf[i].rx_core >= nb_lcores) {
+			RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1,
+					qos_conf[i].rx_core);
+			return -1;
+		}
+		if (qos_conf[i].wt_core >= nb_lcores) {
+			RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1,
+					qos_conf[i].wt_core);
+			return -1;
+		}
+		uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+		uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core);
+		if (rx_sock != wt_sock) {
+			RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1);
+			return -1;
+		}
+		app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core);
+	}
+
+	return 0;
+}
+
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
new file mode 100755
index 0000000000..85f9c02635
--- /dev/null
+++ b/examples/qos_sched/cfg_file.c
@@ -0,0 +1,631 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <rte_string_fns.h>
+#include <rte_sched.h>
+
+#include "cfg_file.h"
+#include "main.h"
+
+
+/** when we resize a file structure, how many extra entries
+ * for new sections do we add in */
+#define CFG_ALLOC_SECTION_BATCH 8
+/** when we resize a section structure, how many extra entries
+ * for new entries do we add in */
+#define CFG_ALLOC_ENTRY_BATCH 16
+
+static unsigned
+_strip(char *str, unsigned len)
+{
+	int newlen = len;
+	if (len == 0)
+		return 0;
+
+	if (isspace(str[len-1])) {
+		/* strip trailing whitespace */
+		while (newlen > 0 && isspace(str[newlen - 1]))
+			str[--newlen] = '\0';
+	}
+
+	if (isspace(str[0])) {
+		/* strip leading whitespace */
+		int i,start = 1;
+		while (isspace(str[start]) && start < newlen)
+			start++
+			; /* do nothing */
+		newlen -= start;
+		for (i = 0; i < newlen; i++)
+			str[i] = str[i+start];
+		str[i] = '\0';
+	}
+	return newlen;
+}
+
+struct cfg_file *
+cfg_load(const char *filename, int flags)
+{
+	int allocated_sections = CFG_ALLOC_SECTION_BATCH;
+	int allocated_entries = 0;
+	int curr_section = -1;
+	int curr_entry = -1;
+	char buffer[256];
+	int lineno = 0;
+	struct cfg_file *cfg = NULL;
+
+	FILE *f = fopen(filename, "r");
+	if (f == NULL)
+		return NULL;
+
+	cfg = malloc(sizeof(*cfg) +	sizeof(cfg->sections[0]) * allocated_sections);
+	if (cfg == NULL)
+		goto error2;
+
+	memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections);
+
+	while (fgets(buffer, sizeof(buffer), f) != NULL) {
+		char *pos = NULL;
+		size_t len = strnlen(buffer, sizeof(buffer));
+		lineno++;
+		if (len >=sizeof(buffer) - 1 && buffer[len-1] != '\n'){
+			printf("Error line %d - no \\n found on string. "
+					"Check if line too long\n", lineno);
+			goto error1;
+		}
+		if ((pos = memchr(buffer, ';', sizeof(buffer))) != NULL) {
+			*pos = '\0';
+			len = pos -  buffer;
+		}
+
+		len = _strip(buffer, len);
+		if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL)
+			continue;
+
+		if (buffer[0] == '[') {
+			/* section heading line */
+			char *end = memchr(buffer, ']', len);
+			if (end == NULL) {
+				printf("Error line %d - no terminating '[' found\n", lineno);
+				goto error1;
+			}
+			*end = '\0';
+			_strip(&buffer[1], end - &buffer[1]);
+
+			/* close off old section and add start new one */
+			if (curr_section >= 0)
+				cfg->sections[curr_section]->num_entries = curr_entry + 1;
+			curr_section++;
+
+			/* resize overall struct if we don't have room for more sections */
+			if (curr_section == allocated_sections) {
+				allocated_sections += CFG_ALLOC_SECTION_BATCH;
+				struct cfg_file *n_cfg = realloc(cfg, sizeof(*cfg) +
+						sizeof(cfg->sections[0]) * allocated_sections);
+				if (n_cfg == NULL) {
+					printf("Error - no more memory\n");
+					goto error1;
+				}
+				cfg = n_cfg;
+			}
+
+			/* allocate space for new section */
+			allocated_entries = CFG_ALLOC_ENTRY_BATCH;
+			curr_entry = -1;
+			cfg->sections[curr_section] = malloc(sizeof(*cfg->sections[0]) +
+					sizeof(cfg->sections[0]->entries[0]) * allocated_entries);
+			if (cfg->sections[curr_section] == NULL) {
+				printf("Error - no more memory\n");
+				goto error1;
+			}
+
+			rte_snprintf(cfg->sections[curr_section]->name,
+					sizeof(cfg->sections[0]->name),
+					"%s", &buffer[1]);
+		}
+		else {
+			/* value line */
+			if (curr_section < 0) {
+				printf("Error line %d - value outside of section\n", lineno);
+				goto error1;
+			}
+
+			struct cfg_section *sect = cfg->sections[curr_section];
+			char *split[2];
+			if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') != 2) {
+				printf("Error at line %d - cannot split string\n", lineno);
+				goto error1;
+			}
+
+			curr_entry++;
+			if (curr_entry == allocated_entries) {
+				allocated_entries += CFG_ALLOC_ENTRY_BATCH;
+				struct cfg_section *n_sect = realloc(sect, sizeof(*sect) +
+						sizeof(sect->entries[0]) * allocated_entries);
+				if (n_sect == NULL) {
+					printf("Error - no more memory\n");
+					goto error1;
+				}
+				sect = cfg->sections[curr_section] = n_sect;
+			}
+
+			sect->entries[curr_entry] = malloc(sizeof(*sect->entries[0]));
+			if (sect->entries[curr_entry] == NULL) {
+				printf("Error - no more memory\n");
+				goto error1;
+			}
+
+			struct cfg_entry *entry = sect->entries[curr_entry];
+			rte_snprintf(entry->name, sizeof(entry->name), "%s", split[0]);
+			rte_snprintf(entry->value, sizeof(entry->value), "%s", split[1]);
+			_strip(entry->name, strnlen(entry->name, sizeof(entry->name)));
+			_strip(entry->value, strnlen(entry->value, sizeof(entry->value)));
+		}
+	}
+	fclose(f);
+	cfg->flags = flags;
+	cfg->sections[curr_section]->num_entries = curr_entry + 1;
+	cfg->num_sections = curr_section + 1;
+	return cfg;
+
+error1:
+	cfg_close(cfg);
+error2:
+	fclose(f);
+	return NULL;
+}
+
+
+int cfg_close(struct cfg_file *cfg)
+{
+	int i, j;
+
+	if (cfg == NULL)
+		return -1;
+
+	for(i = 0; i < cfg->num_sections; i++) {
+		if (cfg->sections[i] != NULL) {
+			if (cfg->sections[i]->num_entries) {
+				for(j = 0; j < cfg->sections[i]->num_entries; j++) {
+					if (cfg->sections[i]->entries[j] != NULL)
+						free(cfg->sections[i]->entries[j]);
+				}
+			}
+			free(cfg->sections[i]);
+		}
+	}
+	free(cfg);
+
+	return 0;
+}
+
+int
+cfg_num_sections(struct cfg_file *cfg, const char *sectionname, size_t length)
+{
+	int i;
+	int num_sections = 0;
+	for (i = 0; i < cfg->num_sections; i++) {
+		if (strncmp(cfg->sections[i]->name, sectionname, length) == 0)
+			num_sections++;
+	}
+	return num_sections;
+}
+
+int
+cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections)
+{
+	int i;
+	for (i = 0; i < cfg->num_sections && i < max_sections; i++) {
+		rte_snprintf(sections[i], CFG_NAME_LEN, "%s",  cfg->sections[i]->name);
+	}
+	return i;
+}
+
+static const struct cfg_section *
+_get_section(struct cfg_file *cfg, const char *sectionname)
+{
+	int i;
+	for (i = 0; i < cfg->num_sections; i++) {
+		if (strncmp(cfg->sections[i]->name, sectionname,
+				sizeof(cfg->sections[0]->name)) == 0)
+			return cfg->sections[i];
+	}
+	return NULL;
+}
+
+int
+cfg_has_section(struct cfg_file *cfg, const char *sectionname)
+{
+	return (_get_section(cfg, sectionname) != NULL);
+}
+
+int
+cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname)
+{
+	const struct cfg_section *s = _get_section(cfg, sectionname);
+	if (s == NULL)
+		return -1;
+	return s->num_entries;
+}
+
+
+int
+cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
+		struct cfg_entry *entries, int max_entries)
+{
+	int i;
+	const struct cfg_section *sect = _get_section(cfg, sectionname);
+	if (sect == NULL)
+		return -1;
+	for (i = 0; i < max_entries && i < sect->num_entries; i++)
+		entries[i] = *sect->entries[i];
+	return i;
+}
+
+const char *
+cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
+		const char *entryname)
+{
+	int i;
+	const struct cfg_section *sect = _get_section(cfg, sectionname);
+	if (sect == NULL)
+		return NULL;
+	for (i = 0; i < sect->num_entries; i++)
+		if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) == 0)
+			return sect->entries[i]->value;
+	return NULL;
+}
+
+int
+cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
+		const char *entryname)
+{
+	return (cfg_get_entry(cfg, sectionname, entryname) != NULL);
+}
+
+
+int
+cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port_params)
+{
+	const char *entry;
+	int j;
+
+	if (!cfg || !port_params)
+		return -1;
+
+	entry = cfg_get_entry(cfg, "port", "frame overhead");
+	if (entry)
+		port_params->frame_overhead = (uint32_t)atoi(entry);
+
+	entry = cfg_get_entry(cfg, "port", "number of subports per port");
+	if (entry)
+		port_params->n_subports_per_port = (uint32_t)atoi(entry);
+	
+	entry = cfg_get_entry(cfg, "port", "number of pipes per subport");
+	if (entry)
+		port_params->n_pipes_per_subport = (uint32_t)atoi(entry);
+
+	entry = cfg_get_entry(cfg, "port", "queue sizes");
+	if (entry) {
+		char *next;
+		
+		for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+			port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10);
+			if (next == NULL)
+				break;
+			entry = next;
+		}
+	}
+
+#ifdef RTE_SCHED_RED
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		char str[32];
+
+		/* Parse WRED min thresholds */
+		rte_snprintf(str, sizeof(str), "tc %d wred min", j);
+		entry = cfg_get_entry(cfg, "red", str);
+		if (entry) {
+			char *next;
+			int k;
+			/* for each packet colour (green, yellow, red) */
+			for (k = 0; k < e_RTE_METER_COLORS; k++) {
+				port_params->red_params[j][k].min_th
+					= (uint16_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+
+		/* Parse WRED max thresholds */
+		rte_snprintf(str, sizeof(str), "tc %d wred max", j);
+		entry = cfg_get_entry(cfg, "red", str);
+		if (entry) {
+			char *next;
+			int k;
+			/* for each packet colour (green, yellow, red) */
+			for (k = 0; k < e_RTE_METER_COLORS; k++) {
+				port_params->red_params[j][k].max_th
+					= (uint16_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+
+		/* Parse WRED inverse mark probabilities */
+		rte_snprintf(str, sizeof(str), "tc %d wred inv prob", j);
+		entry = cfg_get_entry(cfg, "red", str);
+		if (entry) {
+			char *next;
+			int k;
+			/* for each packet colour (green, yellow, red) */
+			for (k = 0; k < e_RTE_METER_COLORS; k++) {
+				port_params->red_params[j][k].maxp_inv
+					= (uint8_t)strtol(entry, &next, 10);
+
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+
+		/* Parse WRED EWMA filter weights */
+		rte_snprintf(str, sizeof(str), "tc %d wred weight", j);
+		entry = cfg_get_entry(cfg, "red", str);
+		if (entry) {
+			char *next;
+			int k;
+			/* for each packet colour (green, yellow, red) */
+			for (k = 0; k < e_RTE_METER_COLORS; k++) {
+				port_params->red_params[j][k].wq_log2
+					= (uint8_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+	}
+#endif /* RTE_SCHED_RED */
+	
+	return 0;
+}
+
+int
+cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe_params)
+{
+	int i, j;
+	char *next;
+	const char *entry;
+	int profiles;
+
+	if (!cfg || !pipe_params)
+		return -1;
+
+	profiles = cfg_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1);
+	port_params.n_pipe_profiles = profiles;
+
+	for (j = 0; j < profiles; j++) {
+		char pipe_name[32];
+		rte_snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j);
+
+		entry = cfg_get_entry(cfg, pipe_name, "tb rate");
+		if (entry)
+			pipe_params[j].tb_rate = (uint32_t)atoi(entry);
+
+		entry = cfg_get_entry(cfg, pipe_name, "tb size");
+		if (entry)
+			pipe_params[j].tb_size = (uint32_t)atoi(entry);
+
+		entry = cfg_get_entry(cfg, pipe_name, "tc period");
+		if (entry)
+			pipe_params[j].tc_period = (uint32_t)atoi(entry);
+
+		entry = cfg_get_entry(cfg, pipe_name, "tc 0 rate");
+		if (entry)
+			pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry);
+			
+		entry = cfg_get_entry(cfg, pipe_name, "tc 1 rate");
+		if (entry)
+			pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry);
+			
+		entry = cfg_get_entry(cfg, pipe_name, "tc 2 rate");
+		if (entry)
+			pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry);
+			
+		entry = cfg_get_entry(cfg, pipe_name, "tc 3 rate");
+		if (entry)
+			pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		entry = cfg_get_entry(cfg, pipe_name, "tc 0 oversubscription weight");
+		if (entry)
+			pipe_params[j].tc_ov_weight[0] = (uint8_t)atoi(entry);
+			
+		entry = cfg_get_entry(cfg, pipe_name, "tc 1 oversubscription weight");
+		if (entry)
+			pipe_params[j].tc_ov_weight[1] = (uint8_t)atoi(entry);
+			
+		entry = cfg_get_entry(cfg, pipe_name, "tc 2 oversubscription weight");
+		if (entry)
+			pipe_params[j].tc_ov_weight[2] = (uint8_t)atoi(entry);
+			
+		entry = cfg_get_entry(cfg, pipe_name, "tc 3 oversubscription weight");
+		if (entry)
+			pipe_params[j].tc_ov_weight[3] = (uint8_t)atoi(entry);
+#endif
+
+		entry = cfg_get_entry(cfg, pipe_name, "tc 0 wrr weights");
+		if (entry) {
+			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] =
+					(uint8_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+		entry = cfg_get_entry(cfg, pipe_name, "tc 1 wrr weights");
+		if (entry) {
+			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] =
+					(uint8_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+		entry = cfg_get_entry(cfg, pipe_name, "tc 2 wrr weights");
+		if (entry) {
+			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] =
+					(uint8_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+		entry = cfg_get_entry(cfg, pipe_name, "tc 3 wrr weights");
+		if (entry) {
+			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] =
+					(uint8_t)strtol(entry, &next, 10);
+				if (next == NULL)
+					break;
+				entry = next;
+			}
+		}
+	}
+	return 0;
+}
+
+int
+cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport_params)
+{
+	const char *entry;
+	int i, j, k;
+
+	if (!cfg || !subport_params)
+		return -1;
+
+	memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile));
+
+	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
+		char sec_name[CFG_NAME_LEN];
+		rte_snprintf(sec_name, sizeof(sec_name), "subport %d", i);
+
+		if (cfg_has_section(cfg, sec_name)) {
+			entry = cfg_get_entry(cfg, sec_name, "tb rate");
+			if (entry)
+				subport_params[i].tb_rate = (uint32_t)atoi(entry);
+
+			entry = cfg_get_entry(cfg, sec_name, "tb size");
+			if (entry)
+				subport_params[i].tb_size = (uint32_t)atoi(entry);
+
+			entry = cfg_get_entry(cfg, sec_name, "tc period");
+			if (entry)
+				subport_params[i].tc_period = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+			entry = cfg_get_entry(cfg, sec_name, "tc oversubscription period");
+			if (entry)
+				subport_params[i].tc_ov_period = (uint32_t)atoi(entry);
+#endif
+
+			entry = cfg_get_entry(cfg, sec_name, "tc 0 rate");
+			if (entry)
+				subport_params[i].tc_rate[0] = (uint32_t)atoi(entry);
+
+			entry = cfg_get_entry(cfg, sec_name, "tc 1 rate");
+			if (entry)
+				subport_params[i].tc_rate[1] = (uint32_t)atoi(entry);
+
+			entry = cfg_get_entry(cfg, sec_name, "tc 2 rate");
+			if (entry)
+				subport_params[i].tc_rate[2] = (uint32_t)atoi(entry);
+
+			entry = cfg_get_entry(cfg, sec_name, "tc 3 rate");
+			if (entry)
+				subport_params[i].tc_rate[3] = (uint32_t)atoi(entry);
+
+			int n_entries = cfg_section_num_entries(cfg, sec_name);
+			struct cfg_entry entries[n_entries];
+
+			cfg_section_entries(cfg, sec_name, entries, n_entries);
+
+			for (j = 0; j < n_entries; j++) {
+				if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) {
+					int profile;
+					char *tokens[2] = {NULL, NULL};
+					int n_tokens;
+					int begin, end;
+
+					profile = atoi(entries[j].value);
+					n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")],
+							strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-');
+
+					begin =  atoi(tokens[0]);
+					if (n_tokens == 2)
+						end = atoi(tokens[1]);
+					else
+						end = begin;
+
+					if (end >= MAX_SCHED_PIPES || begin > end)
+						return -1;
+
+					for (k = begin; k <= end; k++) {
+						char profile_name[CFG_NAME_LEN];
+
+						rte_snprintf(profile_name, sizeof(profile_name),
+								"pipe profile %d", profile);
+						if (cfg_has_section(cfg, profile_name))
+							app_pipe_to_profile[i][k] = profile;
+						else
+							rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n",
+									entries[j].value);
+
+					}
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
new file mode 100755
index 0000000000..2e265e689b
--- /dev/null
+++ b/examples/qos_sched/cfg_file.h
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __CFG_FILE_H__
+#define __CFG_FILE_H__
+
+#include <rte_sched.h>
+
+#define CFG_NAME_LEN 32
+#define CFG_VALUE_LEN 64
+
+struct cfg_entry {
+	char name[CFG_NAME_LEN];
+	char value[CFG_VALUE_LEN];
+};
+
+struct cfg_section {
+	char name[CFG_NAME_LEN];
+	int num_entries;
+	struct cfg_entry *entries[0];
+};
+
+struct cfg_file {
+	int flags;
+	int num_sections;
+	struct cfg_section *sections[0];
+};
+
+
+int cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port);
+
+int cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe);
+
+int cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport);
+
+
+/* reads a config file from disk and returns a handle to the config 
+ * 'flags' is reserved for future use and must be 0
+ */
+struct cfg_file *cfg_load(const char *filename, int flags);
+
+/* returns the number of sections in the config */
+int cfg_num_sections(struct cfg_file *cfg, const char *sec_name, size_t length);
+
+/* fills the array "sections" with the names of all the sections in the file
+ * (up to a max of max_sections).
+ * NOTE: buffers in the sections array must be at least CFG_NAME_LEN big
+ */
+int cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections);
+
+/* true if the named section exists, false otherwise */
+int cfg_has_section(struct cfg_file *cfg, const char *sectionname);
+
+/* returns the number of entries in a section */
+int cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname);
+
+/* returns the entries in a section as key-value pairs in the "entries" array */
+int cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
+		struct cfg_entry *entries, int max_entries);
+
+/* returns a pointer to the value of the named entry in the named section */
+const char *cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
+		const char *entryname);
+
+/* true if the given entry exists in the given section, false otherwise */
+int cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
+		const char *entryname);
+
+/* cleans up memory allocated by cfg_load() */
+int cfg_close(struct cfg_file *cfg);
+
+#endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
new file mode 100755
index 0000000000..1654c73504
--- /dev/null
+++ b/examples/qos_sched/init.c
@@ -0,0 +1,385 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdint.h>
+#include <memory.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_debug.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_sched.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+#include "cfg_file.h"
+
+uint32_t app_numa_mask = 0;
+static uint32_t app_inited_port_mask = 0;
+
+int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+#define MAX_NAME_LEN 32
+
+struct ring_conf ring_conf = {
+	.rx_size   = APP_RX_DESC_DEFAULT,
+	.ring_size = APP_RING_SIZE,
+	.tx_size   = APP_TX_DESC_DEFAULT,
+};
+
+struct burst_conf burst_conf = {
+	.rx_burst    = MAX_PKT_RX_BURST,
+	.ring_burst  = PKT_ENQUEUE,
+	.qos_dequeue = PKT_DEQUEUE,
+	.tx_burst    = MAX_PKT_TX_BURST,
+};
+
+struct ring_thresh rx_thresh = {
+	.pthresh = RX_PTHRESH,
+	.hthresh = RX_HTHRESH,
+	.wthresh = RX_WTHRESH,
+};
+
+struct ring_thresh tx_thresh = {
+	.pthresh = TX_PTHRESH,
+	.hthresh = TX_HTHRESH,
+	.wthresh = TX_WTHRESH,
+};
+
+uint32_t nb_pfc;
+const char *cfg_profile = NULL;
+struct flow_conf qos_conf[MAX_DATA_STREAMS];
+
+static const struct rte_eth_conf port_conf = {
+	.rxmode = {
+		.max_rx_pkt_len = ETHER_MAX_LEN,
+		.split_hdr_size = 0,
+		.header_split   = 0, /**< Header Split disabled */
+		.hw_ip_checksum = 0, /**< IP checksum offload disabled */
+		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
+		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
+		.hw_strip_crc   = 0, /**< CRC stripped by hardware */
+	},
+	.txmode = {
+		.mq_mode = ETH_DCB_NONE,
+	},
+};
+
+static int
+app_init_port(uint8_t portid, struct rte_mempool *mp)
+{
+	int ret;
+	struct rte_eth_link link;
+	struct rte_eth_rxconf rx_conf;
+	struct rte_eth_txconf tx_conf;
+
+	/* check if port already initialized (multistream configuration) */
+	if (app_inited_port_mask & (1u << portid))
+		return 0;
+
+	rx_conf.rx_thresh.pthresh = rx_thresh.pthresh;
+	rx_conf.rx_thresh.hthresh = rx_thresh.hthresh;
+	rx_conf.rx_thresh.wthresh = rx_thresh.wthresh;
+	rx_conf.rx_free_thresh = 32;
+	rx_conf.rx_drop_en = 0;
+
+	tx_conf.tx_thresh.pthresh = tx_thresh.pthresh;
+	tx_conf.tx_thresh.hthresh = tx_thresh.hthresh;
+	tx_conf.tx_thresh.wthresh = tx_thresh.wthresh;
+	tx_conf.tx_free_thresh = 0;
+	tx_conf.tx_rs_thresh = 0;
+	tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS;
+
+	/* init port */
+	RTE_LOG(INFO, APP, "Initializing port %hu... ", portid);
+	fflush(stdout);
+	ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%hu\n",
+		ret, portid);
+
+	/* init one RX queue */
+	fflush(stdout);
+	ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size,
+		rte_eth_dev_socket_id(portid), &rx_conf, mp);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%hu\n",
+		ret, portid);
+	
+	/* init one TX queue */
+	fflush(stdout);
+	ret = rte_eth_tx_queue_setup(portid, 0,
+		(uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+		"port=%hu queue=%d\n",
+		ret, portid, 0);
+
+	/* Start device */
+	ret = rte_eth_dev_start(portid);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "rte_pmd_port_start: err=%d, port=%hu\n",
+		ret, portid);
+
+	printf("done: ");
+
+	/* get link status */
+	rte_eth_link_get(portid, &link);
+	if (link.link_status) {
+		printf(" Link Up - speed %u Mbps - %s\n",
+			(uint32_t) link.link_speed,
+			(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+			("full-duplex") : ("half-duplex\n"));
+	} else {
+		printf(" Link Down\n");
+	}
+	rte_eth_promiscuous_enable(portid);
+	
+	/* mark port as initialized */
+	app_inited_port_mask |= 1u << portid;
+	
+	return 0;
+}
+
+static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.tb_rate = 1250000000,
+		.tb_size = 1000000,
+
+		.tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+		.tc_period = 10,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		.tc_ov_period = 10,
+#endif
+	},
+};
+
+static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = {
+	{ /* Profile #0 */
+		.tb_rate = 305175,
+		.tb_size = 1000000,
+
+		.tc_rate = {305175, 305175, 305175, 305175}, 
+		.tc_period = 40,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		.tc_ov_weight = {1, 1, 1, 1},
+#endif
+		
+		.wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
+	},
+};
+
+struct rte_sched_port_params port_params = {
+	.name = "port_0",
+	.socket = 0, /* computed */
+	.rate = 0, /* computed */
+	.frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+	.n_subports_per_port = 1,
+	.n_pipes_per_subport = 4096,
+	.qsize = {64, 64, 64, 64},
+	.pipe_profiles = pipe_profiles,
+	.n_pipe_profiles = 1,
+
+#ifdef RTE_SCHED_RED
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}
+	}
+#endif /* RTE_SCHED_RED */
+};
+
+static struct rte_sched_port *
+app_init_sched_port(uint32_t portid, uint32_t socketid)
+{
+	static char port_name[32]; /* static as referenced from global port_params*/
+	struct rte_eth_link link;
+	struct rte_sched_port *port = NULL;
+	uint32_t pipe, subport;
+	int err;
+
+	rte_eth_link_get((uint8_t)portid, &link);
+
+	port_params.socket = socketid;
+	port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8;
+	rte_snprintf(port_name, sizeof(port_name), "port_%d", portid);
+	port_params.name = port_name;
+
+	port = rte_sched_port_config(&port_params);
+	if (port == NULL){
+		rte_exit(EXIT_FAILURE, "Unable to config sched port\n");
+	}
+
+	for (subport = 0; subport < port_params.n_subports_per_port; subport ++) {
+		err = rte_sched_subport_config(port, subport, &subport_params[subport]);
+		if (err) {
+			rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n",
+					subport, err);
+		}
+	
+		for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) {
+			if (app_pipe_to_profile[subport][pipe] != -1) {
+				err = rte_sched_pipe_config(port, subport, pipe,
+						app_pipe_to_profile[subport][pipe]);
+				if (err) {
+					rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u "
+							"for profile %d, err=%d\n", pipe,
+							app_pipe_to_profile[subport][pipe], err);
+				}
+			}
+		}
+	}
+	
+	return port;
+}
+
+static int
+app_load_cfg_profile(const char *profile)
+{
+	if (profile == NULL)
+		return 0;
+	
+	struct cfg_file *cfg_file = cfg_load(profile, 0);
+	if (cfg_file == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile);
+
+	cfg_load_port(cfg_file, &port_params);
+	cfg_load_subport(cfg_file, subport_params);
+	cfg_load_pipe(cfg_file, pipe_profiles);
+
+	cfg_close(cfg_file);
+
+	return 0;
+}
+
+int app_init(void)
+{
+	uint32_t i;
+	char ring_name[MAX_NAME_LEN];
+	char pool_name[MAX_NAME_LEN];
+
+	/* init driver(s) */
+	if (rte_pmd_init_all() < 0)
+		rte_exit(EXIT_FAILURE, "Cannot init PMD\n");
+
+	if (rte_eal_pci_probe() < 0)
+		rte_exit(EXIT_FAILURE, "Cannot probe PCI\n");
+
+	if (rte_eth_dev_count() == 0)
+		rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n");
+
+	/* load configuration profile */
+	if (app_load_cfg_profile(cfg_profile) != 0)
+		rte_exit(EXIT_FAILURE, "Invalid configuration profile\n");
+	
+	/* Initialize each active flow */
+	for(i = 0; i < nb_pfc; i++) {
+		uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+		struct rte_ring *ring;
+
+		rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core);
+		ring = rte_ring_lookup(ring_name);
+		if (ring == NULL)
+			qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+			 	socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+		else
+			qos_conf[i].rx_ring = ring;
+
+		rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core);
+		ring = rte_ring_lookup(ring_name);
+		if (ring == NULL)
+			qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+				socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+		else
+			qos_conf[i].tx_ring = ring;
+
+
+		/* create the mbuf pools for each RX Port */
+		rte_snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i);
+		qos_conf[i].mbuf_pool = rte_mempool_create(pool_name, NB_MBUF, MBUF_SIZE,
+						burst_conf.rx_burst * 4,
+						sizeof(struct rte_pktmbuf_pool_private),
+						rte_pktmbuf_pool_init, NULL,
+						rte_pktmbuf_init, NULL,
+						rte_eth_dev_socket_id(qos_conf[i].rx_port),
+						0);
+		if (qos_conf[i].mbuf_pool == NULL)
+			rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i);
+
+		//printf("MP = %d\n", rte_mempool_count(qos_conf[i].app_pktmbuf_pool));
+
+		app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool);
+		app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool);
+		
+		qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].rx_port, socket);
+	}
+
+	RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n",
+			 rte_get_timer_hz());
+	
+	RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u,"
+			 "NIC TX = %u\n", ring_conf.rx_size, NB_MBUF, ring_conf.ring_size,
+			 ring_conf.tx_size);
+
+	RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n"
+						  "             Worker read/QoS enqueue = %hu,\n"
+						  "             QoS dequeue = %hu, Worker write = %hu\n",
+		burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst, 
+		burst_conf.qos_dequeue, burst_conf.tx_burst);
+
+	RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu),"
+				 "TX (p = %hhu, h = %hhu, w = %hhu)\n",
+		rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh,
+		tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh);
+
+	return 0;
+}
diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c
new file mode 100755
index 0000000000..b6cbe3572a
--- /dev/null
+++ b/examples/qos_sched/main.c
@@ -0,0 +1,246 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+
+#include <rte_sched.h>
+
+#include "main.h"
+
+#define APP_MODE_NONE 0
+#define APP_RX_MODE   1
+#define APP_WT_MODE   2
+#define APP_TX_MODE   4
+
+
+/* main processing loop */
+static int
+app_main_loop(__attribute__((unused))void *dummy)
+{
+	uint32_t lcore_id;
+	uint32_t i, mode;
+	uint32_t rx_idx = 0;
+	uint32_t wt_idx = 0;
+	uint32_t tx_idx = 0;
+	struct thread_conf *rx_confs[MAX_DATA_STREAMS];
+	struct thread_conf *wt_confs[MAX_DATA_STREAMS];
+	struct thread_conf *tx_confs[MAX_DATA_STREAMS];
+
+	memset(rx_confs, 0, sizeof(rx_confs));
+	memset(wt_confs, 0, sizeof(wt_confs));
+	memset(tx_confs, 0, sizeof(tx_confs));
+
+
+	mode = APP_MODE_NONE;
+	lcore_id = rte_lcore_id();
+
+	for (i = 0; i < nb_pfc; i++) {
+		struct flow_conf *flow = &qos_conf[i];
+
+		if (flow->rx_core == lcore_id) {
+			flow->rx_thread.rx_port = flow->rx_port;
+			flow->rx_thread.rx_ring =  flow->rx_ring;
+			flow->rx_thread.rx_queue = flow->rx_queue;
+
+			rx_confs[rx_idx++] = &flow->rx_thread;
+
+			mode |= APP_RX_MODE;
+		}
+		if (flow->tx_core == lcore_id) {
+			flow->tx_thread.tx_port = flow->tx_port;
+			flow->tx_thread.tx_ring =  flow->tx_ring;
+			flow->tx_thread.tx_queue = flow->tx_queue;
+
+			tx_confs[tx_idx++] = &flow->tx_thread;
+
+			mode |= APP_TX_MODE;
+		}
+		if (flow->wt_core == lcore_id) {
+			flow->wt_thread.rx_ring =  flow->rx_ring;
+			flow->wt_thread.tx_ring =  flow->tx_ring;
+			flow->wt_thread.tx_port =  flow->tx_port;
+			flow->wt_thread.sched_port =  flow->sched_port;
+
+			wt_confs[wt_idx++] = &flow->wt_thread;
+
+			mode |= APP_WT_MODE;
+		}
+	}
+
+	if (mode == APP_MODE_NONE) {
+		RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id);
+		return -1;
+	}
+
+	if (mode == (APP_RX_MODE | APP_WT_MODE)) {
+		RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n",
+				 lcore_id);
+		return -1;
+	}
+
+	RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id);
+	/* initialize mbuf memory */
+	if (mode == APP_RX_MODE) {
+		for (i = 0; i < rx_idx; i++) {
+			RTE_LOG(INFO, APP, "flow %u lcoreid %u reading port %hu\n",
+				i, lcore_id, rx_confs[i]->rx_port);
+		}
+
+		app_rx_thread(rx_confs);
+	}
+	else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
+		for (i = 0; i < wt_idx; i++) {
+			wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
+					* burst_conf.tx_burst, CACHE_LINE_SIZE);
+
+			if (wt_confs[i]->m_table == NULL)
+				rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+			RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write port %hu\n",
+				i, lcore_id, wt_confs[i]->tx_port);
+		}
+
+		app_mixed_thread(wt_confs);
+	}
+	else if (mode == APP_TX_MODE) {
+		for (i = 0; i < tx_idx; i++) {
+			tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
+					* burst_conf.tx_burst, CACHE_LINE_SIZE);
+
+			if (tx_confs[i]->m_table == NULL)
+				rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+			RTE_LOG(INFO, APP, "flow %u lcoreid %u writing port %hu\n",
+				i, lcore_id, tx_confs[i]->tx_port);
+		}
+
+		app_tx_thread(tx_confs);
+	}
+	else if (mode == APP_WT_MODE){
+		for (i = 0; i < wt_idx; i++) {
+			RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id);
+		}
+
+		app_worker_thread(wt_confs);
+	}
+
+	return 0;
+}
+
+static void
+app_stat(void)
+{
+	uint32_t i;
+	struct rte_eth_stats stats;
+	static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS];
+	static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS];
+
+	/* print statistics */
+	for(i = 0; i < nb_pfc; i++) {
+		struct flow_conf *flow = &qos_conf[i];
+
+		rte_eth_stats_get(flow->rx_port, &stats);
+		printf("\nRX port %hu: rx: %"PRIu64 " err: %"PRIu64 " no_mbuf: %"PRIu64 "\n",
+			flow->rx_port,
+			stats.ipackets - rx_stats[i].ipackets,
+			stats.ierrors - rx_stats[i].ierrors,
+			stats.rx_nombuf - rx_stats[i].rx_nombuf);
+		memcpy(&rx_stats[i], &stats, sizeof(stats));
+
+		rte_eth_stats_get(flow->tx_port, &stats);
+		printf("TX port %hu: tx: %" PRIu64 " err: %" PRIu64 "\n",
+			flow->tx_port,
+			stats.opackets - tx_stats[i].opackets,
+			stats.oerrors - tx_stats[i].oerrors);
+		memcpy(&tx_stats[i], &stats, sizeof(stats));
+
+		//printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool));
+
+#if APP_COLLECT_STAT
+		printf("-------+------------+------------+\n");
+		printf("       |  received  |   dropped  |\n");
+		printf("-------+------------+------------+\n");
+		printf("  RX   | %10" PRIu64 " | %10" PRIu64 " |\n",
+			flow->rx_thread.stat.nb_rx,
+			flow->rx_thread.stat.nb_drop);
+		printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " |   pps: %"PRIu64 " \n",
+			flow->wt_thread.stat.nb_rx,
+			flow->wt_thread.stat.nb_drop,
+			flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop);
+		printf("-------+------------+------------+\n");
+
+		memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat));
+		memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat));
+#endif
+	}
+}
+
+
+
+int
+MAIN(int argc, char **argv)
+{
+	int ret;
+
+	ret = app_parse_args(argc, argv);
+	if (ret < 0)
+		return -1;
+
+	ret = app_init();
+	if (ret < 0)
+		return -1;
+
+
+	/* launch per-lcore init on every lcore */
+	rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER);
+	
+	/* print statistics every second */
+	while(1) {
+		sleep(1);
+		app_stat();
+	}
+}
+
+
+
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
new file mode 100755
index 0000000000..243064c51d
--- /dev/null
+++ b/examples/qos_sched/main.h
@@ -0,0 +1,186 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_sched.h>
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#error "Baremetal is not supported"
+#else
+#define MAIN main
+#endif
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define APP_RX_DESC_DEFAULT 128
+#define APP_TX_DESC_DEFAULT 256
+
+#define MBUF_SIZE (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define APP_RING_SIZE (8*1024)
+#define NB_MBUF   (64*1024*32)
+
+#define MAX_PKT_RX_BURST 64
+#define PKT_ENQUEUE 64
+#define PKT_DEQUEUE 32
+#define MAX_PKT_TX_BURST 64
+
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
+
+#define BURST_TX_DRAIN_US 100
+
+#define MAX_DATA_STREAMS (RTE_MAX_LCORE/2)
+#define MAX_SCHED_SUBPORTS		8
+#define MAX_SCHED_PIPES  		4096
+
+#ifndef APP_COLLECT_STAT
+#define APP_COLLECT_STAT		1
+#endif
+
+#if APP_COLLECT_STAT
+#define APP_STATS_ADD(stat,val) (stat) += (val)
+#else
+#define APP_STATS_ADD(stat,val) do {(void) (val);} while (0)
+#endif
+
+struct thread_stat
+{
+	uint64_t nb_rx;
+	uint64_t nb_drop;
+};
+
+
+struct thread_conf
+{
+	uint32_t counter;
+	uint32_t n_mbufs;
+	struct rte_mbuf **m_table;
+
+	uint8_t rx_port;
+	uint8_t tx_port;
+	uint16_t rx_queue;
+	uint16_t tx_queue;
+	struct rte_ring *rx_ring;
+	struct rte_ring *tx_ring;
+	struct rte_sched_port *sched_port;
+
+#if APP_COLLECT_STAT
+	struct thread_stat stat;
+#endif
+} __rte_cache_aligned;
+
+
+struct flow_conf
+{
+	uint32_t rx_core;
+	uint32_t wt_core;
+	uint32_t tx_core;
+	uint8_t rx_port;
+	uint8_t tx_port;
+	uint16_t rx_queue;
+	uint16_t tx_queue;
+	struct rte_ring *rx_ring;
+	struct rte_ring *tx_ring;
+	struct rte_sched_port *sched_port;
+	struct rte_mempool *mbuf_pool;
+
+	struct thread_conf rx_thread;
+	struct thread_conf wt_thread;
+	struct thread_conf tx_thread;
+};
+
+
+struct ring_conf
+{
+	uint32_t rx_size;
+	uint32_t ring_size;
+	uint32_t tx_size;
+};
+
+struct burst_conf
+{
+	uint16_t rx_burst;
+	uint16_t ring_burst;
+	uint16_t qos_dequeue;
+	uint16_t tx_burst;
+};
+
+struct ring_thresh
+{
+	uint8_t pthresh; /**< Ring prefetch threshold. */
+	uint8_t hthresh; /**< Ring host threshold. */
+	uint8_t wthresh; /**< Ring writeback threshold. */
+};
+
+extern uint32_t nb_pfc;
+extern const char *cfg_profile;
+extern struct flow_conf qos_conf[];
+extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+extern struct ring_conf ring_conf;
+extern struct burst_conf burst_conf;
+extern struct ring_thresh rx_thresh;
+extern struct ring_thresh tx_thresh;
+
+extern struct rte_sched_port_params port_params;
+
+int MAIN(int argc, char **argv);
+int app_parse_args(int argc, char **argv);
+int app_init(void);
+
+void app_rx_thread(struct thread_conf **qconf);
+void app_tx_thread(struct thread_conf **qconf);
+void app_worker_thread(struct thread_conf **qconf);
+void app_mixed_thread(struct thread_conf **qconf);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MAIN_H_ */
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
new file mode 100644
index 0000000000..5caa996e3e
--- /dev/null
+++ b/examples/qos_sched/profile.cfg
@@ -0,0 +1,109 @@
+;   BSD LICENSE
+; 
+;   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+;   All rights reserved.
+; 
+;   Redistribution and use in source and binary forms, with or without 
+;   modification, are permitted provided that the following conditions 
+;   are met:
+; 
+;     * Redistributions of source code must retain the above copyright 
+;       notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above copyright 
+;       notice, this list of conditions and the following disclaimer in 
+;       the documentation and/or other materials provided with the 
+;       distribution.
+;     * Neither the name of Intel Corporation nor the names of its 
+;       contributors may be used to endorse or promote products derived 
+;       from this software without specific prior written permission.
+; 
+;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+;   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+;   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+;   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+;   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+;   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+; 
+
+; This file enables the following hierarchical scheduler configuration for each
+; 10GbE output port:
+;	* Single subport (subport 0): 
+;		- Subport rate set to 100% of port rate
+;		- Each of the 4 traffic classes has rate set to 100% of port rate
+;	* 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
+;		- Pipe rate set to 1/4K of port rate
+;		- Each of the 4 traffic classes has rate set to 100% of pipe rate
+;		- Within each traffic class, the byte-level WRR weights for the 4 queues
+;         are set to 1:1:1:1
+;
+; For more details, please refer to chapter "Quality of Service (QoS) Framework"
+; of Intel Data Plane Development Kit (Intel DPDK) Programmer's Guide.
+ 
+; Port configuration
+[port]
+frame overhead = 24
+number of subports per port = 1
+number of pipes per subport = 4096
+queue sizes = 64 64 64 64
+
+; Subport configuration
+[subport 0]
+tb rate = 1250000000           ; Bytes per second
+tb size = 1000000              ; Bytes
+
+tc 0 rate = 1250000000         ; Bytes per second
+tc 1 rate = 1250000000         ; Bytes per second
+tc 2 rate = 1250000000         ; Bytes per second
+tc 3 rate = 1250000000         ; Bytes per second
+tc period = 10                 ; Milliseconds
+tc oversubscription period = 10; Milliseconds
+
+pipe 0-4095 = 0                ; These pipes are configured with pipe profile 0
+
+; Pipe configuration
+[pipe profile 0]
+tb rate = 305175               ; Bytes per second
+tb size = 1000000              ; Bytes
+
+tc 0 rate = 305175             ; Bytes per second
+tc 1 rate = 305175             ; Bytes per second
+tc 2 rate = 305175             ; Bytes per second
+tc 3 rate = 305175             ; Bytes per second
+tc period = 40                 ; Milliseconds
+
+tc 0 oversubscription weight = 1
+tc 1 oversubscription weight = 1
+tc 2 oversubscription weight = 1
+tc 3 oversubscription weight = 1
+
+tc 0 wrr weights = 1 1 1 1
+tc 1 wrr weights = 1 1 1 1
+tc 2 wrr weights = 1 1 1 1
+tc 3 wrr weights = 1 1 1 1
+
+; RED params per traffic class and color (Green / Yellow / Red)
+[red]
+tc 0 wred min = 48 40 32
+tc 0 wred max = 64 64 64
+tc 0 wred inv prob = 10 10 10
+tc 0 wred weight = 9 9 9
+
+tc 1 wred min = 48 40 32
+tc 1 wred max = 64 64 64
+tc 1 wred inv prob = 10 10 10
+tc 1 wred weight = 9 9 9
+
+tc 2 wred min = 48 40 32
+tc 2 wred max = 64 64 64
+tc 2 wred inv prob = 10 10 10
+tc 2 wred weight = 9 9 9
+
+tc 3 wred min = 48 40 32
+tc 3 wred max = 64 64 64
+tc 3 wred inv prob = 10 10 10
+tc 3 wred weight = 9 9 9
diff --git a/lib/Makefile b/lib/Makefile
index 122ba42f29..74162f2c55 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,6 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
+DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_PMAC) += librte_pmac
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index b0c735f171..bf4a3e2c76 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -74,6 +74,7 @@ extern struct rte_logs rte_logs;
 #define RTE_LOGTYPE_PMAC    0x00000200 /**< Log related to PMAC. */
 #define RTE_LOGTYPE_POWER   0x00000400 /**< Log related to power. */
 #define RTE_LOGTYPE_METER   0x00000800 /**< Log related to QoS meter. */
+#define RTE_LOGTYPE_SCHED   0x00001000 /**< Log related to QoS port scheduler. */
 
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1   0x01000000 /**< User-defined log type 1. */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 27988c3bf4..5d610cb3ea 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -158,6 +158,7 @@ struct rte_pktmbuf {
 			uint16_t hash;
 			uint16_t id;
 		} fdir;             /**< Filter identifier if FDIR enabled */
+		uint32_t sched;     /**< Hierarchical scheduler */
 	} hash;                 /**< hash information */
 };
 
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
new file mode 100644
index 0000000000..5050db0e13
--- /dev/null
+++ b/lib/librte_sched/Makefile
@@ -0,0 +1,56 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without 
+#   modification, are permitted provided that the following conditions 
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright 
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright 
+#       notice, this list of conditions and the following disclaimer in 
+#       the documentation and/or other materials provided with the 
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its 
+#       contributors may be used to endorse or promote products derived 
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_sched.a
+
+CFLAGS += -O3
+CFLAGS += -g
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c
new file mode 100644
index 0000000000..c05e2a7826
--- /dev/null
+++ b/lib/librte_sched/rte_approx.c
@@ -0,0 +1,197 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdlib.h>
+
+#include "rte_approx.h"
+
+/* 
+ * Based on paper "Approximating Rational Numbers by Fractions" by Michal 
+ * Forisek forisek@dcs.fmph.uniba.sk
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ *
+ * http://people.ksp.sk/~misof/publications/2007approx.pdf
+ */
+
+/* fraction comparison: compare (a/b) and (c/d) */
+static inline uint32_t 
+less(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+	return (a*d < b*c);
+}
+
+static inline uint32_t
+less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+	return (a*d <= b*c);
+}
+
+/* check whether a/b is a valid approximation */
+static inline uint32_t 
+matches(uint32_t a, uint32_t b, 
+	uint32_t alpha_num, uint32_t d_num, uint32_t denum)
+{
+	if (less_or_equal(a, b, alpha_num - d_num, denum))
+		return 0;
+
+	if (less(a ,b, alpha_num + d_num, denum))
+		return 1;
+	
+	return 0;
+}
+
+static inline void 
+find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, 
+	uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+	uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b;
+	uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a;
+	uint32_t k = (k_num / k_denum) + 1;
+	
+	*p = p_b + k * p_a;
+	*q = q_b + k * q_a;
+}
+
+static inline void
+find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
+	uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) 
+{
+	uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b;
+	uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a;
+	uint32_t k = (k_num / k_denum) + 1;
+	
+	*p = p_b + k * p_a;
+	*q = q_b + k * q_a;
+}
+
+static int 
+find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+	uint32_t p_a, q_a, p_b, q_b;
+	
+	/* check assumptions on the inputs */
+	if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) {
+		return -1;
+	}
+	
+	/* set initial bounds for the search */
+	p_a = 0;
+	q_a = 1;
+	p_b = 1;
+	q_b = 1;
+
+	while (1) {
+		uint32_t new_p_a, new_q_a, new_p_b, new_q_b;
+		uint32_t x_num, x_denum, x;
+		int aa, bb;
+		
+		/* compute the number of steps to the left */
+		x_num = denum * p_b - alpha_num * q_b;
+		x_denum = - denum * p_a + alpha_num * q_a;
+		x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+		
+		/* check whether we have a valid approximation */
+		aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+		bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+		if (aa || bb) {
+			find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+			return 0;
+		}
+		
+		/* update the interval */
+		new_p_a = p_b + (x - 1) * p_a ;
+		new_q_a = q_b + (x - 1) * q_a;
+		new_p_b = p_b + x * p_a ;
+		new_q_b = q_b + x * q_a;
+
+		p_a = new_p_a ;
+		q_a = new_q_a;
+		p_b = new_p_b ;
+		q_b = new_q_b;
+
+		/* compute the number of steps to the right */
+		x_num = alpha_num * q_b - denum * p_b;
+		x_denum = - alpha_num * q_a + denum * p_a;
+		x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+		/* check whether we have a valid approximation */
+		aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+		bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+		if (aa || bb) {
+			find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+			return 0;
+		 }
+		 
+		/* update the interval */
+		new_p_a = p_b + (x - 1) * p_a;
+		new_q_a = q_b + (x - 1) * q_a;
+		new_p_b = p_b + x * p_a;
+		new_q_b = q_b + x * q_a;
+		
+		p_a = new_p_a;
+		q_a = new_q_a;
+		p_b = new_p_b;
+		q_b = new_q_b;
+	}
+}
+
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q)
+{
+	uint32_t alpha_num, d_num, denum;
+	
+	/* Check input arguments */
+	if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) {
+		return -1;
+	}
+	
+	if ((p == NULL) || (q == NULL)) {
+		return -2;
+	}
+	
+	/* Compute alpha_num, d_num and denum */
+	denum = 1;
+	while (d < 1) {
+		alpha *= 10;
+		d *= 10;
+		denum *= 10;
+	}
+	alpha_num = (uint32_t) alpha;
+	d_num = (uint32_t) d;
+	
+	/* Perform approximation */
+	return find_best_rational_approximation(alpha_num, d_num, denum, p, q);	
+}
diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h
new file mode 100644
index 0000000000..d755afa60e
--- /dev/null
+++ b/lib/librte_sched/rte_approx.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_APPROX_H__
+#define __INCLUDE_RTE_APPROX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Rational Approximation
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ * 
+ ***/
+
+#include <stdint.h>
+
+/**
+ * Find best rational approximation
+ *
+ * @param alpha
+ *   Rational number to approximate
+ * @param d
+ *   Precision for the rational approximation
+ * @param p
+ *   Pointer to pre-allocated space where the numerator of the rational 
+ *   approximation will be stored when operation is successful
+ * @param q
+ *   Pointer to pre-allocated space where the denominator of the rational
+ *   approximation will be stored when operation is successful
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_APPROX_H__ */
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h
new file mode 100644
index 0000000000..c52db32a78
--- /dev/null
+++ b/lib/librte_sched/rte_bitmap.h
@@ -0,0 +1,505 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64-byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512 bits) of bitmap bits, which are stored in array2: the bit in array1 is
+ * set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there 
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the 
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+ 
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+#ifndef RTE_BITMAP_OPTIMIZATIONS
+#define RTE_BITMAP_OPTIMIZATIONS		         1
+#endif
+#if RTE_BITMAP_OPTIMIZATIONS
+#include <tmmintrin.h>
+#endif
+
+/** Number of elements in array1. Each element in array1 is a 64-bit slab. */
+#ifndef RTE_BITMAP_ARRAY1_SIZE
+#define RTE_BITMAP_ARRAY1_SIZE                   16
+#endif
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE                 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
+#define RTE_BITMAP_SLAB_BIT_MASK                 (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE                   (CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2              9
+#define RTE_BITMAP_CL_BIT_MASK                   (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE                  (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2             3
+#define RTE_BITMAP_CL_SLAB_MASK                  (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+	uint64_t array1[RTE_BITMAP_ARRAY1_SIZE]; /**< Bitmap array1 */
+	uint64_t *array2;                        /**< Bitmap array2 */
+	uint32_t array1_size;                    /**< Number of 64-bit slabs in array1 that are actually used */
+	uint32_t array2_size;                    /**< Number of 64-bit slabs in array2 */
+	
+	/* Context for the "scan next" operation */
+	uint32_t index1;  /**< Bitmap scan: Index of current array1 slab */
+	uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+	uint32_t index2;  /**< Bitmap scan: Index of current array2 slab */
+	uint32_t go2;     /**< Bitmap scan: Go/stop condition for current array2 cache line */
+} __rte_cache_aligned;
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+	bmp->index1 = (bmp->index1 + 1) & (RTE_BITMAP_ARRAY1_SIZE - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+	return ((~1lu) << bmp->offset1);
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+	bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+#if RTE_BITMAP_OPTIMIZATIONS
+
+static inline int 
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+	if (likely(slab == 0)) {
+		return 0;
+	}
+
+	*pos = __builtin_ctzll(slab);
+	return 1;
+}
+
+#else
+
+static inline int 
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+	uint64_t mask;
+	uint32_t i;
+	
+	if (likely(slab == 0)) {
+		return 0;
+	}
+
+	for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
+		if (unlikely(slab & mask)) {
+			*pos = i;
+			return 1;
+		}
+	}
+	
+	return 0;
+}
+
+#endif
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+	bmp->index1 = RTE_BITMAP_ARRAY1_SIZE - 1;
+	bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+	__rte_bitmap_index2_set(bmp);
+	bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+	bmp->go2 = 0;
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param array2
+ *   Base address of pre-allocated array2
+ * @param n_bits
+ *   Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline int 
+rte_bitmap_init(struct rte_bitmap *bmp, uint8_t *array2, uint32_t n_bits)
+{
+	uint32_t array1_size, array2_size;
+
+	/* Check input arguments */
+	if ((bmp == NULL) || 
+	    (array2 == NULL) || (((uintptr_t) array2) & CACHE_LINE_MASK) ||
+		(n_bits == 0) || (n_bits & RTE_BITMAP_CL_BIT_MASK)){
+		return -1;
+	}
+
+	array2_size = n_bits / RTE_BITMAP_SLAB_BIT_SIZE;
+	array1_size = ((n_bits / RTE_BITMAP_CL_BIT_SIZE) + (RTE_BITMAP_SLAB_BIT_SIZE - 1)) / RTE_BITMAP_SLAB_BIT_SIZE;
+	if (array1_size > RTE_BITMAP_ARRAY1_SIZE){
+		return -1;
+	}
+	
+	/* Setup bitmap */
+	memset(bmp, 0, sizeof(struct rte_bitmap));
+	bmp->array2 = (uint64_t *) array2;
+	bmp->array1_size = array1_size;
+	bmp->array2_size = array2_size;
+	__rte_bitmap_scan_init(bmp);
+	
+	return 0;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+	/* Check input arguments */
+	if (bmp == NULL) {
+		return -1;
+	}
+	
+	return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+	memset(bmp->array1, 0, sizeof(bmp->array1));
+	memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+	__rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab2;
+	uint32_t index2;
+	
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	slab2 = bmp->array2 + index2;
+	rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ * @return
+ *   0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab2;
+	uint32_t index2, offset2;
+	
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	return ((*slab2) & (1lu << offset2));
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab1, *slab2;
+	uint32_t index1, index2, offset1, offset2;
+	
+	/* Set bit in array2 slab and set bit in array1 slab */
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	slab1 = bmp->array1 + index1;
+	
+	*slab2 |= 1lu << offset2;
+	*slab1 |= 1lu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position identifying the array2 slab
+ * @param slab
+ *   Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+	uint64_t *slab1, *slab2;
+	uint32_t index1, index2, offset1;
+	
+	/* Set bits in array2 slab and set bit in array1 slab */
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	slab1 = bmp->array1 + index1;
+	
+	*slab2 |= slab;
+	*slab1 |= 1lu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+	uint64_t v1, v2, v3, v4;
+	
+	v1 = slab2[0] | slab2[1];
+	v2 = slab2[2] | slab2[3];
+	v3 = slab2[4] | slab2[5];
+	v4 = slab2[6] | slab2[7];
+	v1 |= v2;
+	v3 |= v4;
+	
+	return (v1 | v3);
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab1, *slab2;
+	uint32_t index1, index2, offset1, offset2;
+
+	/* Clear bit in array2 slab */
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	
+	/* Return if array2 slab is not all-zeros */
+	*slab2 &= ~(1lu << offset2);
+	if (*slab2){
+		return;
+	}
+	
+	/* Check the entire cache line of array2 for all-zeros */
+	index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+	slab2 = bmp->array2 + index2;
+	if (__rte_bitmap_line_not_empty(slab2)) {
+		return;
+	}
+	
+	/* The array2 cache line is all-zeros, so clear bit in array1 slab */
+	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+	slab1 = bmp->array1 + index1;
+	*slab1 &= ~(1lu << offset1);
+
+	return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+	uint64_t value1;
+	uint32_t i;
+	
+	/* Check current array1 slab */
+	value1 = bmp->array1[bmp->index1];
+	value1 &= __rte_bitmap_mask1_get(bmp);
+	
+	if (rte_bsf64(value1, &bmp->offset1)) {
+		return 1;
+	}
+	
+	__rte_bitmap_index1_inc(bmp);
+	bmp->offset1 = 0;
+	
+	/* Look for another array1 slab */
+	for (i = 0; i < RTE_BITMAP_ARRAY1_SIZE; i ++, __rte_bitmap_index1_inc(bmp)) {
+		value1 = bmp->array1[bmp->index1];
+		
+		if (rte_bsf64(value1, &bmp->offset1)) {
+			return 1;
+		}
+	}
+	
+	return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+	__rte_bitmap_index2_set(bmp);
+	bmp->go2 = 1;
+	rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+	uint64_t *slab2;
+	
+	slab2 = bmp->array2 + bmp->index2;
+	for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+		if (*slab2) {
+			*pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+			*slab = *slab2;
+			
+			bmp->index2 ++;
+			slab2 ++;
+			bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+			return 1;
+		}
+	}
+	
+	return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   When function call returns 1, pos contains the position of the next set
+ *   bit, otherwise not modified
+ * @param slab
+ *   When function call returns 1, slab contains the value of the entire 64-bit
+ *   slab where the bit indicated by pos is located. Slabs are always 64-bit
+ *   aligned, so the position of the first bit of the slab (this bit is not 
+ *   necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ *   scan operation, the internal pointers of the bitmap are updated to point
+ *   after this slab, so the same slab will not be returned again if it 
+ *   contains more than one bit which is set. When function call returns 0,
+ *   slab is not modified.
+ * @return
+ *   0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+	/* Return data from current array2 line if available */
+	if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+		return 1;
+	}
+	
+	/* Look for non-empty array2 line */
+	if (__rte_bitmap_scan_search(bmp)) {
+		__rte_bitmap_scan_read_init(bmp);
+		__rte_bitmap_scan_read(bmp, pos, slab);
+		return 1;
+	}
+	
+	/* Empty bitmap */
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c
new file mode 100644
index 0000000000..0eaf5a03ab
--- /dev/null
+++ b/lib/librte_sched/rte_red.c
@@ -0,0 +1,160 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <math.h>
+#include "rte_red.h"
+#include <rte_random.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+static int rte_red_init_done = 0;     /**< Flag to indicate that global initialisation is done */
+uint32_t rte_red_rand_val = 0;        /**< Random value cache */
+uint32_t rte_red_rand_seed = 0;       /**< Seed for random number generation */
+
+/**
+ * table[i] = log2(1-Wq) * Scale * -1
+ *       Wq = 1/(2^i)
+ */
+uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+
+/**
+ * table[i] = 2^(i/16) * Scale
+ */
+uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * @brief Initialize tables used to compute average
+ *        queue size when queue is empty.
+ */
+static void
+__rte_red_init_tables(void)
+{
+	uint32_t i = 0;
+	double scale = 0.0;
+	double table_size = 0.0;
+
+	scale = (double)(1 << RTE_RED_SCALING);
+	table_size = (double)(DIM(rte_red_pow2_frac_inv));
+
+	for (i = 0; i < DIM(rte_red_pow2_frac_inv); i++) {
+		double m = (double)i;
+		
+		rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size));
+	}
+	
+	scale = 1024.0;
+
+	RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == DIM(rte_red_log2_1_minus_Wq));
+
+	for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) {
+		double n = (double)i;
+		double Wq = pow(2, -n);
+		uint32_t index = i - RTE_RED_WQ_LOG2_MIN;
+		
+		rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq));
+		/**
+		* Table entry of zero, corresponds to a Wq of zero
+		* which is not valid (avg would remain constant no
+		* matter how long the queue is empty). So we have
+		* to check for zero and round up to one.
+		*/
+		if (rte_red_log2_1_minus_Wq[index] == 0) {
+			rte_red_log2_1_minus_Wq[index] = 1;
+		}
+	}
+}
+
+int
+rte_red_rt_data_init(struct rte_red *red)
+{
+	if (red == NULL)
+		return -1;
+
+	red->avg = 0;
+	red->count = 0;
+	red->q_time = 0;
+	return 0;
+}
+
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+	const uint16_t wq_log2,
+	const uint16_t min_th,
+	const uint16_t max_th,
+	const uint16_t maxp_inv)
+{
+	if (red_cfg == NULL) {
+		return -1;
+	}
+	if (max_th > RTE_RED_MAX_TH_MAX) {
+		return -2;
+	}
+	if (min_th >= max_th) {
+		return -3;
+	}
+	if (wq_log2 > RTE_RED_WQ_LOG2_MAX) {
+		return -4;
+	}
+	if (wq_log2 < RTE_RED_WQ_LOG2_MIN) {
+		return -5;
+	}
+	if (maxp_inv < RTE_RED_MAXP_INV_MIN) {
+		return -6;
+	}
+	if (maxp_inv > RTE_RED_MAXP_INV_MAX) {
+		return -7;
+	}
+	
+	/**
+	 *  Initialize the RED module if not already done
+	 */
+	if (!rte_red_init_done) {
+		rte_red_rand_seed = rte_rand();
+		rte_red_rand_val = rte_fast_rand();
+		__rte_red_init_tables();
+		rte_red_init_done = 1;
+	}
+
+	red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING);
+	red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING);
+	red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING;
+	red_cfg->maxp_inv = maxp_inv;
+	red_cfg->wq_log2 = wq_log2;
+
+	return 0;
+}
diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h
new file mode 100644
index 0000000000..debe556f4c
--- /dev/null
+++ b/lib/librte_sched/rte_red.h
@@ -0,0 +1,454 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __RTE_RED_H_INCLUDED__
+#define __RTE_RED_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Random Early Detection (RED)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+#include <limits.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_branch_prediction.h>
+
+#define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
+#define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
+#define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
+#define RTE_RED_WQ_LOG2_MIN                 1          /**< Min inverse filter weight value */
+#define RTE_RED_WQ_LOG2_MAX                 12         /**< Max inverse filter weight value */
+#define RTE_RED_MAXP_INV_MIN                1          /**< Min inverse mark probability value */
+#define RTE_RED_MAXP_INV_MAX                255        /**< Max inverse mark probability value */
+#define RTE_RED_2POW16                      (1<<16)    /**< 2 power 16 */
+#define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
+#define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
+
+#ifdef RTE_RED_DEBUG
+
+#define RTE_RED_ASSERT(exp)                                      \
+if (!(exp)) {                                                    \
+	rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
+}
+
+#else
+
+#define RTE_RED_ASSERT(exp)                 do { } while(0)
+
+#endif /* RTE_RED_DEBUG */
+
+/**
+ * Externs
+ * 
+ */
+extern uint32_t rte_red_rand_val;
+extern uint32_t rte_red_rand_seed;
+extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+extern uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * RED configuration parameters passed by user
+ * 
+ */
+struct rte_red_params {
+	uint16_t min_th;   /**< Minimum threshold for queue (max_th) */
+	uint16_t max_th;   /**< Maximum threshold for queue (max_th) */
+	uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
+	uint16_t wq_log2;  /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
+};
+
+/**
+ * RED configuration parameters
+ */
+struct rte_red_config {
+	uint32_t min_th;   /**< min_th scaled in fixed-point format */
+	uint32_t max_th;   /**< max_th scaled in fixed-point format */
+	uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
+	uint8_t maxp_inv;  /**< maxp_inv */
+	uint8_t wq_log2;   /**< wq_log2 */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_red {
+	uint32_t avg;      /**< Average queue size (avg), scaled in fixed-point format */
+	uint32_t count;    /**< Number of packets since last marked packet (count) */
+	uint64_t q_time;   /**< Start of the queue idle time (q_time) */
+};
+
+/** 
+ * @brief Initialises run-time data
+ *  
+ * @param [in,out] data pointer to RED runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_rt_data_init(struct rte_red *red);
+
+/** 
+ * @brief Configures a single RED configuration parameter structure.
+ * 
+ * @param [in,out] config pointer to a RED configuration parameter structure
+ * @param [in] wq_log2 log2 of the filter weight, valid range is:
+ *             RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
+ * @param [in] min_th queue minimum threshold in number of packets
+ * @param [in] max_th queue maximum threshold in number of packets
+ * @param [in] maxp_inv inverse maximum mark probability
+ * 
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+	const uint16_t wq_log2,
+	const uint16_t min_th,
+	const uint16_t max_th,
+	const uint16_t maxp_inv);
+
+/**
+ * @brief Generate random number for RED
+ *
+ * Implemenetation based on:
+ * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
+ *
+ * 10 bit shift has been found through empirical tests (was 16).
+ *
+ * @return Random number between 0 and (2^22 - 1)
+ */
+static inline uint32_t
+rte_fast_rand(void)
+{
+	rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
+	return (rte_red_rand_seed >> 10);
+}
+
+/**
+ * @brief calculate factor to scale average queue size when queue
+ *        becomes empty
+ *
+ * @param [in] wq_log2, where EWMA filter weight wq = 1/(2 ^ wq_log2)
+ * @param [in] m exponent in the computed value (1 - wq) ^ m
+ *
+ * @return computed value
+ * @retval ((1 - wq) ^ m) scaled in fixed-point format
+ */
+static inline uint16_t
+__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
+{
+	uint32_t n = 0;
+	uint32_t f = 0;
+
+	/**
+	 * Basic math tells us that:
+	 *   a^b = 2^(b * log2(a) )
+	 *
+	 * in our case:
+	 *   a = (1-Wq)
+	 *   b = m
+	 *  Wq = 1/ (2^log2n)
+	 *
+	 * So we are computing this equation:
+	 *   factor = 2 ^ ( m * log2(1-Wq))
+	 *
+	 * First we are computing:
+	 *    n = m * log2(1-Wq)
+	 *
+	 * To avoid dealing with signed numbers log2 values are positive
+	 * but they should be negative because (1-Wq) is always < 1.
+	 * Contents of log2 table values are also scaled for precision.
+	 */
+
+	n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
+
+	/**
+	 * The tricky part is computing 2^n, for this I split n into
+	 * integer part and fraction part.
+	 *   f - is fraction part of n
+	 *   n - is integer part of original n
+	 *
+	 * Now using basic math we compute 2^n:
+	 *   2^(f+n) = 2^f * 2^n
+	 *   2^f - we use lookup table
+	 *   2^n - can be replaced with bit shift right oeprations
+	 */
+
+	f = (n >> 6) & 0xf;
+	n >>= 10;
+
+	if (n < RTE_RED_SCALING)
+		return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
+
+	return 0;
+}
+
+/** 
+ * @brief Updates queue average in condition when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] time current time stamp
+ * 
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
+	struct rte_red *red,
+	const uint64_t time)
+{
+	uint64_t time_diff = 0, m = 0;
+	
+	RTE_RED_ASSERT(red_cfg != NULL);
+	RTE_RED_ASSERT(red != NULL);
+
+	red->count ++;
+
+	/**
+	 * We compute avg but we don't compare avg against
+	 *  min_th or max_th, nor calculate drop probability
+	 */
+	time_diff = time - red->q_time;
+
+	/**
+	 * m is the number of packets that might have arrived while the queue was empty.
+	 * In this case we have time stamps provided by scheduler in byte units (bytes 
+	 * transmitted on network port). Such time stamp translates into time units as
+	 * port speed is fixed but such approach simplifies the code.
+	 */
+	m = time_diff / RTE_RED_S;
+
+	/**
+	 * Check that m will fit into 16-bit unsigned integer
+	 */
+	if (m >= RTE_RED_2POW16) {
+		red->avg = 0;
+	} else {
+		red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
+	}
+
+	return 0;
+}
+
+/**
+ *  Drop probability (Sally Floyd and Van Jacobson):
+ *
+ *     pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
+ *     pa = pb / (2 - count * pb)
+ *
+ *
+ *                 (1 / maxp_inv) * (avg - min_th)
+ *                ---------------------------------
+ *                         max_th - min_th
+ *     pa = -----------------------------------------------
+ *                count * (1 / maxp_inv) * (avg - min_th)
+ *           2 - -----------------------------------------
+ *                          max_th - min_th
+ *
+ *
+ *                                  avg - min_th
+ *     pa = -----------------------------------------------------------
+ *           2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
+ *
+ *
+ *  We define pa_const as: pa_const =  2 * (max_th - min_th) * maxp_inv. Then:
+ *
+ *
+ *                     avg - min_th
+ *     pa = -----------------------------------
+ *           pa_const - count * (avg - min_th)
+ */
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on mark probability
+ *        criteria
+ *
+ * @param [in] config pointer to structure defining RED parameters
+ * @param [in,out] data pointer to RED runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
+{
+	uint32_t pa_num = 0;    /* numerator of drop-probability */
+	uint32_t pa_den = 0;    /* denominator of drop-probability */
+	uint32_t pa_num_count = 0;
+
+	pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
+
+	pa_num_count = red->count * pa_num;
+
+	if (red_cfg->pa_const <= pa_num_count)
+		return 1;
+
+	pa_den = red_cfg->pa_const - pa_num_count;
+
+	/* If drop, generate and save random number to be used next time */
+	if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
+		rte_red_rand_val = rte_fast_rand();
+		
+		return 1;
+	}
+	
+	/* No drop */
+	return 0;
+}
+
+/** 
+ * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] q current queue size (measured in packets)
+ * 
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
+	struct rte_red *red,
+	const unsigned q)
+{
+	RTE_RED_ASSERT(red_cfg != NULL);
+	RTE_RED_ASSERT(red != NULL);
+
+	/**
+	* EWMA filter (Sally Floyd and Van Jacobson):
+	*    avg = (1 - wq) * avg + wq * q
+	*    avg = avg + q * wq - avg * wq
+	*
+	* We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
+	*    avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
+	*
+	* By using shift left/right operations, we get:
+	*    avg_s = avg_s + (q << N) - (avg_s >> n)
+	*    avg_s += (q << N) - (avg_s >> n)
+	*/
+	
+	/* avg update */
+	red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
+
+	/* avg < min_th: do not mark the packet  */
+	if (red->avg < red_cfg->min_th) {
+		red->count ++;
+		return 0;
+	}
+
+	/* min_th <= avg < max_th: mark the packet with pa probability */
+	if (red->avg < red_cfg->max_th) {
+		if (!__rte_red_drop(red_cfg, red)) {
+			red->count ++;
+			return 0;
+		}
+
+		red->count = 0;
+		return 2;
+	}
+	
+	/* max_th <= avg: always mark the packet */
+	red->count = 0;
+	return 1;
+}
+
+/** 
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data based on new queue size value.
+ * Based on new queue average and RED configuration parameters
+ * gives verdict whether to enqueue or drop the packet. 
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] q updated queue size in packets
+ * @param [in] time current time stamp
+ * 
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criteria
+ * @retval 2 drop the packet based on mark probability criteria
+ */
+static inline int
+rte_red_enqueue(const struct rte_red_config *red_cfg,
+	struct rte_red *red,
+	const unsigned q,
+	const uint64_t time)
+{
+	RTE_RED_ASSERT(red_cfg != NULL);
+	RTE_RED_ASSERT(red != NULL);
+
+	if (q != 0) {
+		return rte_red_enqueue_nonempty(red_cfg, red, q);
+	} else {
+		return rte_red_enqueue_empty(red_cfg, red, time);
+	}
+}
+
+/** 
+ * @brief Callback to records time that queue became empty
+ *
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] time current time stamp
+ */
+static inline void
+rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
+{
+	red->q_time = time;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_RED_H_INCLUDED__ */
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
new file mode 100644
index 0000000000..daa1a0d9e3
--- /dev/null
+++ b/lib/librte_sched/rte_sched.c
@@ -0,0 +1,2129 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_mbuf.h>
+
+#include "rte_sched.h"
+#include "rte_bitmap.h"
+#include "rte_sched_common.h"
+#include "rte_approx.h"
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#ifndef RTE_SCHED_DEBUG
+#define RTE_SCHED_DEBUG                       0
+#endif
+
+#ifndef RTE_SCHED_OPTIMIZATIONS
+#define RTE_SCHED_OPTIMIZATIONS		          0
+#endif
+
+#if RTE_SCHED_OPTIMIZATIONS
+#include <immintrin.h>
+#endif
+
+#define RTE_SCHED_ENQUEUE                     1
+
+#define RTE_SCHED_TS                          1
+
+#if RTE_SCHED_TS == 0 /* Infinite credits. Traffic shaping disabled. */
+#define RTE_SCHED_TS_CREDITS_UPDATE           0
+#define RTE_SCHED_TS_CREDITS_CHECK            0
+#else                 /* Real Credits. Full traffic shaping implemented. */
+#define RTE_SCHED_TS_CREDITS_UPDATE           1
+#define RTE_SCHED_TS_CREDITS_CHECK            1
+#endif
+
+#ifndef RTE_SCHED_TB_RATE_CONFIG_ERR
+#define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
+#endif
+
+#define RTE_SCHED_WRR                         1
+
+#ifndef RTE_SCHED_WRR_SHIFT
+#define RTE_SCHED_WRR_SHIFT                   3
+#endif
+
+#ifndef RTE_SCHED_PORT_N_GRINDERS
+#define RTE_SCHED_PORT_N_GRINDERS             8
+#endif
+#if (RTE_SCHED_PORT_N_GRINDERS == 0) || (RTE_SCHED_PORT_N_GRINDERS & (RTE_SCHED_PORT_N_GRINDERS - 1))
+#error Number of grinders must be non-zero and a power of 2
+#endif
+#if (RTE_SCHED_OPTIMIZATIONS && (RTE_SCHED_PORT_N_GRINDERS != 8))
+#error Number of grinders must be 8 when RTE_SCHED_OPTIMIZATIONS is set
+#endif
+
+#define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
+	
+#define RTE_SCHED_PIPE_INVALID                UINT32_MAX
+
+#define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
+
+struct rte_sched_subport {
+	/* Token bucket (TB) */
+	uint64_t tb_time; /* time of last update */
+	uint32_t tb_period;
+	uint32_t tb_credits_per_period;
+	uint32_t tb_size;
+	uint32_t tb_credits;
+
+	/* Traffic classes (TCs) */
+	uint64_t tc_time; /* time of next update */
+	uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tc_period;
+	
+	/* TC oversubscription */
+	uint32_t tc_ov_period;
+	uint64_t tc_ov_time;
+	uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint8_t tc_ov_period_id;
+	uint8_t tc_ov[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tc_ov_n[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	double tc_ov_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	
+	/* Statistics */
+	struct rte_sched_subport_stats stats;
+};
+
+struct rte_sched_pipe_profile {
+	/* Token bucket (TB) */
+	uint32_t tb_period;
+	uint32_t tb_credits_per_period;
+	uint32_t tb_size;
+	
+	/* Pipe traffic classes */
+	uint32_t tc_period;
+	uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	
+	/* Pipe queues */
+	uint8_t  wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
+};
+
+struct rte_sched_pipe {
+	/* Token bucket (TB) */
+	uint64_t tb_time; /* time of last update */
+	uint32_t tb_credits;
+
+	/* Pipe profile and flags */
+	uint32_t profile;
+	
+	/* Traffic classes (TCs) */
+	uint64_t tc_time; /* time of next update */
+	uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	
+	/* Weighted Round Robin (WRR) */
+	uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
+	
+	/* TC oversubscription */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint8_t tc_ov_period_id;
+#else
+	uint64_t reserved;
+#endif
+} __rte_cache_aligned;
+
+struct rte_sched_queue {
+	uint16_t qw;
+	uint16_t qr;
+};
+
+struct rte_sched_queue_extra {
+	struct rte_sched_queue_stats stats;
+#ifdef RTE_SCHED_RED
+	struct rte_red red;
+#endif
+};
+
+enum grinder_state {
+	e_GRINDER_PREFETCH_PIPE = 0,
+	e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
+	e_GRINDER_PREFETCH_MBUF,
+	e_GRINDER_READ_MBUF
+};
+
+struct rte_sched_grinder {
+	/* Pipe cache */
+	uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
+	uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
+	uint32_t pcache_w;
+	uint32_t pcache_r;
+	
+	/* Current pipe */
+	enum grinder_state state;
+	uint32_t productive;
+	uint32_t pindex;
+	struct rte_sched_subport *subport;
+	struct rte_sched_pipe *pipe;
+	struct rte_sched_pipe_profile *pipe_params;
+
+	/* TC cache */
+	uint8_t tccache_qmask[4];
+	uint32_t tccache_qindex[4];
+	uint32_t tccache_w;
+	uint32_t tccache_r;
+	
+	/* Current TC */
+	uint32_t tc_index;
+	struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint16_t qsize;
+	uint32_t qmask;
+	uint32_t qpos;
+	struct rte_mbuf *pkt;
+	
+	double ov_coef;
+	
+	uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+	uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+	uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+};
+
+struct rte_sched_port {
+	/* User parameters */
+	uint32_t n_subports_per_port;
+	uint32_t n_pipes_per_subport;
+	uint32_t rate;
+	uint32_t frame_overhead;
+	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t n_pipe_profiles;
+#ifdef RTE_SCHED_RED
+	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS];
+#endif
+
+	/* Timing */
+	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
+	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
+	uint64_t time;                /* Current NIC TX time measured in bytes */
+	double cycles_per_byte;       /* CPU cycles per byte */
+	
+	/* Scheduling loop detection */
+	uint32_t pipe_loop;
+	uint32_t pipe_exhaustion;
+
+	/* Bitmap */
+	struct rte_bitmap bmp;
+	uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
+	
+	/* Grinders */
+	struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
+	uint32_t busy_grinders;
+	struct rte_mbuf **pkts_out;
+	uint32_t n_pkts_out;
+	
+	/* Queue base calculation */
+	uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
+	uint32_t qsize_sum;
+	
+	/* Large data structures */
+	struct rte_sched_subport *subport;
+	struct rte_sched_pipe *pipe;
+	struct rte_sched_queue *queue;
+	struct rte_sched_queue_extra *queue_extra;
+	struct rte_sched_pipe_profile *pipe_profiles;
+	uint8_t *bmp_array;
+	struct rte_mbuf **queue_array;
+	uint8_t memory[0] __rte_cache_aligned;
+} __rte_cache_aligned;
+
+enum rte_sched_port_array {
+	e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0,
+	e_RTE_SCHED_PORT_ARRAY_PIPE,
+	e_RTE_SCHED_PORT_ARRAY_QUEUE,
+	e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA,
+	e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES,
+	e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY,
+	e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY,
+	e_RTE_SCHED_PORT_ARRAY_TOTAL,
+};
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline uint32_t
+rte_sched_port_queues_per_subport(struct rte_sched_port *port)
+{
+	return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport;
+}
+
+#endif
+
+static inline uint32_t
+rte_sched_port_queues_per_port(struct rte_sched_port *port)
+{
+	return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port;
+}
+
+static int
+rte_sched_port_check_params(struct rte_sched_port_params *params)
+{
+	uint32_t i, j;
+	
+	if (params == NULL) {
+		return -1;
+	}
+	
+	/* name */
+	if (params->name == NULL) {
+		return -2;
+	}
+	
+	/* socket */
+	if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES)) {
+		return -3;
+	}
+	
+	/* rate */
+	if (params->rate == 0) {
+		return -4;
+	}
+	
+	/* n_subports_per_port: non-zero, power of 2 */
+	if ((params->n_subports_per_port == 0) || (!rte_is_power_of_2(params->n_subports_per_port))) {
+		return -5;
+	}
+
+	/* n_pipes_per_subport: non-zero, power of 2 */
+	if ((params->n_pipes_per_subport == 0) || (!rte_is_power_of_2(params->n_pipes_per_subport))) {
+		return -6;
+	}
+	
+	/* qsize: non-zero, power of 2, no bigger than 32K (due to 16-bit read/write pointers) */
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		uint16_t qsize = params->qsize[i];
+		
+		if ((qsize == 0) || (!rte_is_power_of_2(qsize))) {
+			return -7;
+		}
+	}
+	
+	/* pipe_profiles and n_pipe_profiles */
+	if ((params->pipe_profiles == NULL) || 
+	    (params->n_pipe_profiles == 0) ||
+	    (params->n_pipe_profiles > RTE_SCHED_PIPE_PROFILES_PER_PORT)) {
+		return -8;
+	}
+	
+	for (i = 0; i < params->n_pipe_profiles; i ++) {
+		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
+		
+		/* TB rate: non-zero, not greater than port rate */
+		if ((p->tb_rate == 0) || (p->tb_rate > params->rate)) {
+			return -9;
+		}
+		
+		/* TB size: non-zero */
+		if (p->tb_size == 0) {
+			return -10;
+		}
+
+		/* TC rate: non-zero, less than pipe rate */
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+			if ((p->tc_rate[j] == 0) || (p->tc_rate[j] > p->tb_rate)) {
+				return -11;
+			}
+		}
+		
+		/* TC period: non-zero */
+		if (p->tc_period == 0) {
+			return -12;
+		}
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		/* TC oversubscription weights: non-zero */
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+			if (p->tc_ov_weight[j] == 0) {
+				return -13;
+			}
+		}
+#endif
+
+		/* Queue WRR weights: non-zero */
+		for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j ++) {
+			if (p->wrr_weights[j] == 0) {
+				return -14;
+			}
+		}
+	}
+	
+	return 0;
+}
+
+static uint32_t
+rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sched_port_array array)
+{
+	uint32_t n_subports_per_port = params->n_subports_per_port;
+	uint32_t n_pipes_per_subport = params->n_pipes_per_subport;
+	uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port;
+	uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port;
+	
+	uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport);
+	uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe);
+	uint32_t size_queue = n_queues_per_port * sizeof(struct rte_sched_queue);
+	uint32_t size_queue_extra = n_queues_per_port * sizeof(struct rte_sched_queue_extra);
+	uint32_t size_pipe_profiles = RTE_SCHED_PIPE_PROFILES_PER_PORT * sizeof(struct rte_sched_pipe_profile);
+	uint32_t size_bmp_array = n_queues_per_port / 8;
+	uint32_t size_per_pipe_queue_array, size_queue_array;
+	
+	uint32_t base, i;
+	
+	size_per_pipe_queue_array = 0;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * params->qsize[i] * sizeof(struct rte_mbuf *);
+	}
+	size_queue_array = n_pipes_per_port * size_per_pipe_queue_array;
+	
+	base = 0;
+	
+	if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT) return base;
+	base += CACHE_LINE_ROUNDUP(size_subport);
+	
+	if (array == e_RTE_SCHED_PORT_ARRAY_PIPE) return base;
+	base += CACHE_LINE_ROUNDUP(size_pipe);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE) return base;
+	base += CACHE_LINE_ROUNDUP(size_queue);
+	
+	if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA) return base;
+	base += CACHE_LINE_ROUNDUP(size_queue_extra);
+	
+	if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES) return base;
+	base += CACHE_LINE_ROUNDUP(size_pipe_profiles);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY) return base;
+	base += CACHE_LINE_ROUNDUP(size_bmp_array);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY) return base;
+	base += CACHE_LINE_ROUNDUP(size_queue_array);
+
+	return base;
+}
+
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params)
+{
+	uint32_t size0, size1;
+	int status;
+	
+	status = rte_sched_port_check_params(params);
+	if (status != 0) {
+		RTE_LOG(INFO, SCHED, "Port scheduler params check failed (%d)\n", status);
+		
+		return 0;
+	}
+	
+	size0 = sizeof(struct rte_sched_port);
+	size1 = rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_TOTAL);
+	
+	return (size0 + size1);
+}
+
+static void
+rte_sched_port_config_qsize(struct rte_sched_port *port)
+{
+	/* TC 0 */
+	port->qsize_add[0] = 0;
+	port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
+	port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
+	port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
+	
+	/* TC 1 */
+	port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
+	port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
+	port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
+	port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
+
+	/* TC 2 */
+	port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
+	port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
+	port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
+	port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
+
+	/* TC 3 */
+	port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
+	port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
+	port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
+	port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
+	
+	port->qsize_sum = port->qsize_add[15] + port->qsize[3];
+}
+
+static void 
+rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
+{
+	struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
+	
+	RTE_LOG(INFO, SCHED, "Low level config for pipe profile %u:\n"
+		"\tToken bucket: period = %u, credits per period = %u, size = %u\n"
+		"\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov weights = [%hhu, %hhu, %hhu, %hhu]\n"
+		"\tWRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
+		i,
+		
+		/* Token bucket */
+		p->tb_period,
+		p->tb_credits_per_period,
+		p->tb_size,
+		
+		/* Traffic classes */
+		p->tc_period,
+		p->tc_credits_per_period[0],
+		p->tc_credits_per_period[1],
+		p->tc_credits_per_period[2],
+		p->tc_credits_per_period[3],
+		p->tc_ov_weight[0],
+		p->tc_ov_weight[1],
+		p->tc_ov_weight[2],
+		p->tc_ov_weight[3],
+		
+		/* WRR */
+		p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
+		p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
+		p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
+		p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
+}
+
+static inline uint64_t
+rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
+{
+	uint64_t time = time_ms;
+	time = (time * rate) / 1000;
+	
+	return time;
+}
+
+static void
+rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params)
+{
+	uint32_t i, j;
+	
+	for (i = 0; i < port->n_pipe_profiles; i ++) {
+		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
+		struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
+		
+		/* Token Bucket */
+		if (src->tb_rate == params->rate) {
+			dst->tb_credits_per_period = 1;
+			dst->tb_period = 1;
+		} else {
+			double tb_rate = ((double) src->tb_rate) / ((double) params->rate);
+			double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+			
+			rte_approx(tb_rate, d, &dst->tb_credits_per_period, &dst->tb_period);
+		}
+		dst->tb_size = src->tb_size;
+		
+		/* Traffic Classes */
+		dst->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, params->rate);
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+			dst->tc_credits_per_period[j] = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, src->tc_rate[j]);
+		}
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+			dst->tc_ov_weight[j] = src->tc_ov_weight[j];
+		}
+#endif
+		
+		/* WRR */
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+			uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+			uint32_t lcd, lcd1, lcd2;
+			uint32_t qindex;
+			
+			qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+			
+			wrr_cost[0] = src->wrr_weights[qindex];
+			wrr_cost[1] = src->wrr_weights[qindex + 1];
+			wrr_cost[2] = src->wrr_weights[qindex + 2];
+			wrr_cost[3] = src->wrr_weights[qindex + 3];
+			
+			lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
+			lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
+			lcd = rte_get_lcd(lcd1, lcd2);
+
+			wrr_cost[0] = lcd / wrr_cost[0];
+			wrr_cost[1] = lcd / wrr_cost[1];
+			wrr_cost[2] = lcd / wrr_cost[2];
+			wrr_cost[3] = lcd / wrr_cost[3];
+			
+			dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
+			dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
+			dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
+			dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
+		}
+	
+		rte_sched_port_log_pipe_profile(port, i);
+	}
+}
+
+struct rte_sched_port *
+rte_sched_port_config(struct rte_sched_port_params *params)
+{
+	struct rte_sched_port *port = NULL;
+	const struct rte_memzone *mz = NULL;
+	uint32_t mem_size, i;
+	
+	/* Check user parameters. Determine the amount of memory to allocate */
+	mem_size = rte_sched_port_get_memory_footprint(params);
+	if (mem_size == 0) {
+		return NULL;
+	}
+	
+	/* Allocate memory to store the data structures */
+	mz = rte_memzone_lookup(params->name);
+	if (mz) {
+		/* Use existing memzone, provided that its size is big enough */
+		if (mz->len < mem_size) {
+			return NULL;
+		}
+	} else {
+		/* Create new memzone */
+		mz = rte_memzone_reserve(params->name, mem_size, params->socket, 0);		
+		if (mz == NULL) {
+			return NULL;
+		}
+	}
+	memset(mz->addr, 0, mem_size);
+	port = (struct rte_sched_port *) mz->addr;
+
+	/* User parameters */
+	port->n_subports_per_port = params->n_subports_per_port;
+	port->n_pipes_per_subport = params->n_pipes_per_subport;
+	port->rate = params->rate;
+	port->frame_overhead = params->frame_overhead;
+	memcpy(port->qsize, params->qsize, sizeof(params->qsize));
+	port->n_pipe_profiles = params->n_pipe_profiles;
+
+#ifdef RTE_SCHED_RED
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		uint32_t j;
+		
+		for (j = 0; j < e_RTE_METER_COLORS; j++) {
+			if (rte_red_config_init(&port->red_config[i][j],
+				params->red_params[i][j].wq_log2,
+				params->red_params[i][j].min_th,
+				params->red_params[i][j].max_th,
+				params->red_params[i][j].maxp_inv) != 0) {
+				return NULL;
+			}
+		}
+	}
+#endif
+
+	/* Timing */
+	port->time_cpu_cycles = rte_get_tsc_cycles();
+	port->time_cpu_bytes = 0;
+	port->time = 0;
+	port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) params->rate);
+
+	/* Scheduling loop detection */
+	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+	port->pipe_exhaustion = 0;
+
+	/* Grinders */
+	port->busy_grinders = 0;
+	port->pkts_out = NULL;
+	port->n_pkts_out = 0;
+	
+	/* Queue base calculation */
+	rte_sched_port_config_qsize(port);
+	
+	/* Large data structures */
+	port->subport = (struct rte_sched_subport *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_SUBPORT));
+	port->pipe = (struct rte_sched_pipe *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE));
+	port->queue = (struct rte_sched_queue *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE));
+	port->queue_extra = (struct rte_sched_queue_extra *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA));
+	port->pipe_profiles = (struct rte_sched_pipe_profile *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES));
+	port->bmp_array =  port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY);
+	port->queue_array = (struct rte_mbuf **) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY));
+
+	/* Pipe profile table */
+	rte_sched_port_config_pipe_profile_table(port, params);
+	
+	/* Bitmap */
+	if (rte_bitmap_init(&port->bmp, port->bmp_array, rte_sched_port_queues_per_port(port)) != 0) {
+		RTE_LOG(INFO, SCHED, "Bitmap init error\n");
+		return NULL;
+	}
+	for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) {
+		port->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
+	}
+	
+	return port;
+}
+
+void 
+rte_sched_port_free(struct rte_sched_port *port)
+{
+	/* Check user parameters */
+	if (port == NULL){
+		return;
+	}
+	rte_bitmap_free(&port->bmp);
+	
+	return;
+}
+
+static void
+rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
+{
+	struct rte_sched_subport *s = port->subport + i;
+	
+	RTE_LOG(INFO, SCHED, "Low level config for subport %u:\n"	
+		"\tToken bucket: period = %u, credits per period = %u, size = %u\n"
+		"\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov period = %u\n",
+		i,
+		
+		/* Token bucket */
+		s->tb_period,
+		s->tb_credits_per_period,
+		s->tb_size,
+		
+		/* Traffic classes */
+		s->tc_period,
+		s->tc_credits_per_period[0],
+		s->tc_credits_per_period[1],
+		s->tc_credits_per_period[2],
+		s->tc_credits_per_period[3],
+		s->tc_ov_period);
+}
+
+int
+rte_sched_subport_config(struct rte_sched_port *port, 
+	uint32_t subport_id,
+	struct rte_sched_subport_params *params)
+{
+	struct rte_sched_subport *s;
+	uint32_t i;
+	
+	/* Check user parameters */
+	if ((port == NULL) ||
+	    (subport_id >= port->n_subports_per_port) ||
+		(params == NULL)) {
+		return -1;
+	}
+	
+	if ((params->tb_rate == 0) || (params->tb_rate > port->rate)) {
+		return -2;
+	}
+	
+	if (params->tb_size == 0) {
+		return -3;
+	}
+	
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		if ((params->tc_rate[i] == 0) || (params->tc_rate[i] > params->tb_rate)) {
+			return -4;
+		}
+	}
+	
+	if (params->tc_period == 0) {
+		return -5;
+	}
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	if ((params->tc_ov_period == 0) || (params->tc_ov_period > params->tc_period)) {
+		return -6;
+	}
+#endif
+	
+	s = port->subport + subport_id;
+	
+	/* Token Bucket (TB) */
+	if (params->tb_rate == port->rate) {
+		s->tb_credits_per_period = 1;
+		s->tb_period = 1;
+	} else {
+		double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
+		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+		
+		rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
+	}
+	s->tb_size = params->tb_size;
+	s->tb_time = port->time;
+	s->tb_credits = s->tb_size / 2;
+	
+	/* Traffic Classes (TCs) */
+	s->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		s->tc_credits_per_period[i] = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, params->tc_rate[i]);
+	}
+	s->tc_time = port->time + s->tc_period;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		s->tc_credits[i] = s->tc_credits_per_period[i];
+	}
+	
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	/* TC oversubscription */
+	s->tc_ov_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_ov_period, port->rate);
+	s->tc_ov_time = port->time + s->tc_ov_period;
+	s->tc_ov_period_id = 0;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		s->tc_ov[i] = 0;
+		s->tc_ov_n[i] = 0;
+		s->tc_ov_rate[i] = 0;
+		s->tc_ov_credits[i] = 0;
+	}
+#endif
+	
+	rte_sched_port_log_subport_config(port, subport_id);
+	
+	return 0;
+}
+
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+	uint32_t subport_id, 
+	uint32_t pipe_id,
+	int32_t pipe_profile)
+{
+	struct rte_sched_subport *s;
+	struct rte_sched_pipe *p;
+	struct rte_sched_pipe_profile *params;
+	uint32_t deactivate, profile, i;
+	
+	/* Check user parameters */
+	profile = (uint32_t) pipe_profile;
+	deactivate = (pipe_profile < 0);
+	if ((port == NULL) ||
+	    (subport_id >= port->n_subports_per_port) ||
+		(pipe_id >= port->n_pipes_per_subport) ||
+		((!deactivate) && (profile >= port->n_pipe_profiles))) {
+		return -1;
+	}
+	
+	/* Check that subport configuration is valid */
+	s = port->subport + subport_id;
+	if (s->tb_period == 0) {
+		return -2;
+	}
+	
+	p = port->pipe + (subport_id * port->n_pipes_per_subport + pipe_id);
+	
+	/* Handle the case when pipe already has a valid configuration */
+	if (p->tb_time) {
+		params = port->pipe_profiles + p->profile;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		/* Unplug pipe from its subport */
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+			s->tc_ov_n[i] -= params->tc_ov_weight[i];
+			s->tc_ov_rate[i] -= ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period);
+			s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period));
+		}
+#endif
+		
+		/* Reset the pipe */
+		memset(p, 0, sizeof(struct rte_sched_pipe));
+	}
+	
+	if (deactivate) {
+		return 0;
+	}
+	
+	/* Apply the new pipe configuration */
+	p->profile = profile;
+	params = port->pipe_profiles + p->profile;
+
+	/* Token Bucket (TB) */
+	p->tb_time = port->time;
+	p->tb_credits = params->tb_size / 2;
+	
+	/* Traffic Classes (TCs) */
+	p->tc_time = port->time + params->tc_period;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		p->tc_credits[i] = params->tc_credits_per_period[i];
+	}
+	
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	/* Subport TC oversubscription */
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		s->tc_ov_n[i] += params->tc_ov_weight[i];
+		s->tc_ov_rate[i] += ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period);
+		s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period));
+	}
+	p->tc_ov_period_id = s->tc_ov_period_id;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		p->tc_ov_credits[i] = 0;
+	}
+#endif
+	
+	return 0;
+}
+
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+	uint32_t subport_id,
+	struct rte_sched_subport_stats *stats,
+	uint32_t *tc_ov)
+{
+	struct rte_sched_subport *s;
+	uint32_t mask, i;
+	
+	/* Check user parameters */
+	if ((port == NULL) ||
+	    (subport_id >= port->n_subports_per_port) ||
+		(stats == NULL) ||
+		(tc_ov == NULL)) {
+		return -1;
+	}
+	s = port->subport + subport_id;
+
+	/* Copy subport stats and clear */
+	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
+	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
+	
+	/* Subport TC ovesubscription status */
+	mask = 0;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+		mask |= ((uint32_t) s->tc_ov[i]) << i;
+	}
+	*tc_ov = mask;
+	
+	return 0;
+}
+
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+	uint32_t queue_id,
+	struct rte_sched_queue_stats *stats,
+	uint16_t *qlen)
+{
+	struct rte_sched_queue *q;
+	struct rte_sched_queue_extra *qe;
+	
+	/* Check user parameters */
+	if ((port == NULL) ||
+	    (queue_id >= rte_sched_port_queues_per_port(port)) ||
+		(stats == NULL) ||
+		(qlen == NULL)) {
+		return -1;
+	}
+	q = port->queue + queue_id;
+	qe = port->queue_extra + queue_id;
+
+	/* Copy queue stats and clear */
+	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
+	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
+	
+	/* Queue length */
+	*qlen = q->qw - q->qr;
+	
+	return 0;
+}
+
+static inline uint32_t
+rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue)
+{
+	uint32_t result;
+	
+	result = subport * port->n_pipes_per_subport + pipe;
+	result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + traffic_class;
+	result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
+	
+	return result;
+}
+
+static inline struct rte_mbuf **
+rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
+{
+	uint32_t pindex = qindex >> 4;
+	uint32_t qpos = qindex & 0xF;
+	
+	return (port->queue_array + pindex * port->qsize_sum + port->qsize_add[qpos]);
+}
+
+static inline uint16_t
+rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
+{
+	uint32_t tc = (qindex >> 2) & 0x3;
+	
+	return port->qsize[tc];
+}
+
+#if RTE_SCHED_DEBUG
+
+static inline int
+rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex)
+{
+	struct rte_sched_queue *queue = port->queue + qindex;
+	
+	return (queue->qr == queue->qw);
+}
+
+static inline int
+rte_sched_port_queue_is_full(struct rte_sched_port *port, uint32_t qindex)
+{
+	struct rte_sched_queue *queue = port->queue + qindex;
+	uint16_t qsize = rte_sched_port_qsize(port, qindex);
+	uint16_t qlen = q->qw - q->qr;
+	
+	return (qlen >= qsize);
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline void
+rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+	uint32_t tc_index = (qindex >> 2) & 0x3;
+	uint32_t pkt_len = pkt->pkt.pkt_len;
+	
+	s->stats.n_pkts_tc[tc_index] += 1;
+	s->stats.n_bytes_tc[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+	uint32_t tc_index = (qindex >> 2) & 0x3;
+	uint32_t pkt_len = pkt->pkt.pkt_len;
+	
+	s->stats.n_pkts_tc_dropped[tc_index] += 1;
+	s->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+	uint32_t pkt_len = pkt->pkt.pkt_len;
+	
+	qe->stats.n_pkts += 1;
+	qe->stats.n_bytes += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+	uint32_t pkt_len = pkt->pkt.pkt_len;
+	
+	qe->stats.n_pkts_dropped += 1;
+	qe->stats.n_bytes_dropped += pkt_len;
+}
+
+#endif /* RTE_SCHED_COLLECT_STATS */
+
+#ifdef RTE_SCHED_RED
+
+static inline int
+rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen)
+{
+	struct rte_sched_queue_extra *qe;
+	struct rte_red_config *red_cfg;
+    struct rte_red *red;
+	uint32_t tc_index;
+	enum rte_meter_color color;
+	
+	tc_index = (qindex >> 2) & 0x3;
+	color = rte_sched_port_pkt_read_color(pkt);
+	red_cfg = &port->red_config[tc_index][color];
+
+	qe = port->queue_extra + qindex;
+	red = &qe->red;
+
+	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+}
+
+static inline void
+rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex)
+{
+	struct rte_sched_queue_extra *qe;
+    struct rte_red *red;
+	
+	qe = port->queue_extra + qindex;
+	red = &qe->red;
+
+	rte_red_mark_queue_empty(red, port->time);
+}
+
+#else
+
+#define rte_sched_port_red_drop(port, pkt, qindex, qlen)             0
+
+#define rte_sched_port_set_queue_empty_timestamp(port, qindex)
+
+#endif /* RTE_SCHED_RED */
+
+#if RTE_SCHED_DEBUG
+
+static inline int
+debug_pipe_is_empty(struct rte_sched_port *port, uint32_t pindex)
+{
+	uint32_t qindex, i;
+
+	qindex = pindex << 4;
+	
+	for (i = 0; i < 16; i ++){
+		uint32_t queue_empty = rte_sched_port_queue_is_empty(port, qindex + i);
+		uint32_t bmp_bit_clear = (rte_bitmap_get(&port->bmp, qindex + i) == 0);
+		
+		if (queue_empty != bmp_bit_clear){
+			rte_panic("Queue status mismatch for queue %u of pipe %u\n", i, pindex);
+		}
+		
+		if (!queue_empty){
+			return 0;
+		}
+	}
+	
+	return 1;
+}
+
+static inline void
+debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+	uint64_t mask;
+	uint32_t i, panic;
+	
+	if (bmp_slab == 0){
+		rte_panic("Empty slab at position %u\n", bmp_pos);
+	}
+	
+	panic = 0;
+	for (i = 0, mask = 1; i < 64; i ++, mask <<= 1) {
+		if (mask & bmp_slab){
+			if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) {
+				printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
+				panic = 1;
+			}
+		}
+	}
+	
+	if (panic){
+		rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
+			bmp_slab, bmp_pos);
+	}
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+static inline uint32_t
+rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_port *port, struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue *q;
+#ifdef RTE_SCHED_COLLECT_STATS
+	struct rte_sched_queue_extra *qe;
+#endif
+	uint32_t subport, pipe, traffic_class, queue, qindex;
+
+	rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+	
+	qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+	q = port->queue + qindex;
+	rte_prefetch0(q);
+#ifdef RTE_SCHED_COLLECT_STATS
+	qe = port->queue_extra + qindex;
+	rte_prefetch0(qe);
+#endif
+	
+	return qindex;
+}
+
+static inline void
+rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase)
+{	
+	struct rte_sched_queue *q;
+	struct rte_mbuf **q_qw;
+	uint16_t qsize; 
+	
+	q = port->queue + qindex;
+	qsize = rte_sched_port_qsize(port, qindex);
+	q_qw = qbase + (q->qw & (qsize - 1));
+	
+	rte_prefetch0(q_qw);
+	rte_bitmap_prefetch0(&port->bmp, qindex);
+}
+
+static inline int
+rte_sched_port_enqueue_qwa(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase, struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue *q;
+	uint16_t qsize;
+	uint16_t qlen;
+
+	q = port->queue + qindex;
+	qsize = rte_sched_port_qsize(port, qindex);
+	qlen = q->qw - q->qr;
+
+	/* Drop the packet (and update drop stats) when queue is full */
+	if (unlikely(rte_sched_port_red_drop(port, pkt, qindex, qlen) || (qlen >= qsize))) {
+		rte_pktmbuf_free(pkt);
+#ifdef RTE_SCHED_COLLECT_STATS
+		rte_sched_port_update_subport_stats_on_drop(port, qindex, pkt);
+		rte_sched_port_update_queue_stats_on_drop(port, qindex, pkt);
+#endif
+		return 0;
+	}
+	
+	/* Enqueue packet */
+	qbase[q->qw & (qsize - 1)] = pkt;
+	q->qw ++;
+	
+	/* Activate queue in the port bitmap */
+	rte_bitmap_set(&port->bmp, qindex);
+	
+	/* Statistics */
+#ifdef RTE_SCHED_COLLECT_STATS
+	rte_sched_port_update_subport_stats(port, qindex, pkt);
+	rte_sched_port_update_queue_stats(port, qindex, pkt);
+#endif
+
+	return 1;
+}
+
+#if RTE_SCHED_ENQUEUE == 0
+
+int 
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	uint32_t result, i;
+	
+	result = 0;
+	
+	for (i = 0; i < n_pkts; i ++) {
+		struct rte_mbuf *pkt;
+		struct rte_mbuf **q_base;
+		uint32_t subport, pipe, traffic_class, queue, qindex;
+		
+		pkt = pkts[i];
+		
+		rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+
+		qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+		
+		q_base = rte_sched_port_qbase(port, qindex);
+
+		result += rte_sched_port_enqueue_qwa(port, qindex, q_base, pkt);
+	}
+	
+	return result;
+}
+
+#else
+
+/* The enqueue function implements a 4-level pipeline with each stage processing 
+ * two different packets. The purpose of using a pipeline is to hide the latency 
+ * of prefetching the data structures. The naming convention is presented in the
+ * diagram below:
+ * 
+ *   p00  _______   p10  _______   p20  _______   p30  _______       
+ * ----->|       |----->|       |----->|       |----->|       |----->
+ *       |   0   |      |   1   |      |   2   |      |   3   |      
+ * ----->|_______|----->|_______|----->|_______|----->|_______|----->
+ *   p01            p11            p21            p31                
+ *
+ ***/
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21, *pkt30, *pkt31, *pkt_last;
+	struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base, **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
+	uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
+	uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
+	uint32_t result, i;
+	
+	result = 0;
+	
+	/* Less then 6 input packets available, which is not enough to feed the pipeline */
+	if (unlikely(n_pkts < 6)) {
+		struct rte_mbuf **q_base[5];
+		uint32_t q[5];
+		
+		/* Prefetch the mbuf structure of each packet */
+		for (i = 0; i < n_pkts; i ++) {
+			rte_prefetch0(pkts[i]);
+		}
+		
+		/* Prefetch the queue structure for each queue */
+		for (i = 0; i < n_pkts; i ++) {
+			q[i] = rte_sched_port_enqueue_qptrs_prefetch0(port, pkts[i]);
+		}
+		
+		/* Prefetch the write pointer location of each queue */
+		for (i = 0; i < n_pkts; i ++) {
+			q_base[i] = rte_sched_port_qbase(port, q[i]);
+			rte_sched_port_enqueue_qwa_prefetch0(port, q[i], q_base[i]);
+		}
+		
+		/* Write each packet to its queue */
+		for (i = 0; i < n_pkts; i ++) {
+			result += rte_sched_port_enqueue_qwa(port, q[i], q_base[i], pkts[i]);
+		}
+		
+		return result;
+	}
+	
+	/* Feed the first 3 stages of the pipeline (6 packets needed) */
+	pkt20 = pkts[0];
+	pkt21 = pkts[1];
+	rte_prefetch0(pkt20);
+	rte_prefetch0(pkt21);
+	
+	pkt10 = pkts[2];
+	pkt11 = pkts[3];
+	rte_prefetch0(pkt10);
+	rte_prefetch0(pkt11);
+
+	q20 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt20);
+	q21 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt21);
+
+	pkt00 = pkts[4];
+	pkt01 = pkts[5];
+	rte_prefetch0(pkt00);
+	rte_prefetch0(pkt01);
+	
+	q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+	q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+
+	q20_base = rte_sched_port_qbase(port, q20);
+	q21_base = rte_sched_port_qbase(port, q21);	
+	rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+	
+	/* Run the pipeline */
+	for (i = 6; i < (n_pkts & (~1)); i += 2) {	
+		/* Propagate stage inputs */
+		pkt30 = pkt20;
+		pkt31 = pkt21;
+		pkt20 = pkt10;
+		pkt21 = pkt11;
+		pkt10 = pkt00;
+		pkt11 = pkt01;
+		q30 = q20;
+		q31 = q21;
+		q20 = q10;
+		q21 = q11;
+		q30_base = q20_base;
+		q31_base = q21_base;
+		
+		/* Stage 0: Get packets in */
+		pkt00 = pkts[i];
+		pkt01 = pkts[i + 1];
+		rte_prefetch0(pkt00);
+		rte_prefetch0(pkt01);
+		
+		/* Stage 1: Prefetch queue structure storing queue pointers */
+		q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+		q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+		
+		/* Stage 2: Prefetch queue write location */
+		q20_base = rte_sched_port_qbase(port, q20);
+		q21_base = rte_sched_port_qbase(port, q21);
+		rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+		rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+		
+		/* Stage 3: Write packet to queue and activate queue */
+		r30 = rte_sched_port_enqueue_qwa(port, q30, q30_base, pkt30);
+		r31 = rte_sched_port_enqueue_qwa(port, q31, q31_base, pkt31);
+		result += r30 + r31;
+	}
+	
+	/* Drain the pipeline (exactly 6 packets). Handle the last packet in the case
+	of an odd number of input packets. */
+	pkt_last = pkts[n_pkts - 1];
+	rte_prefetch0(pkt_last);
+	
+	q00 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt00);
+	q01 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt01);
+
+	q10_base = rte_sched_port_qbase(port, q10);
+	q11_base = rte_sched_port_qbase(port, q11);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q10, q10_base);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q11, q11_base);
+		
+	r20 = rte_sched_port_enqueue_qwa(port, q20, q20_base, pkt20);
+	r21 = rte_sched_port_enqueue_qwa(port, q21, q21_base, pkt21);
+	result += r20 + r21;
+	
+	q_last = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt_last);
+
+	q00_base = rte_sched_port_qbase(port, q00);
+	q01_base = rte_sched_port_qbase(port, q01);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q00, q00_base);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q01, q01_base);
+	
+	r10 = rte_sched_port_enqueue_qwa(port, q10, q10_base, pkt10);
+	r11 = rte_sched_port_enqueue_qwa(port, q11, q11_base, pkt11);
+	result += r10 + r11;
+
+	q_last_base = rte_sched_port_qbase(port, q_last);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q_last, q_last_base);
+
+	r00 = rte_sched_port_enqueue_qwa(port, q00, q00_base, pkt00);
+	r01 = rte_sched_port_enqueue_qwa(port, q01, q01_base, pkt01);
+	result += r00 + r01;
+
+	if (n_pkts & 1) {
+		r_last = rte_sched_port_enqueue_qwa(port, q_last, q_last_base, pkt_last);
+		result += r_last;
+	}
+	
+	return result;
+}
+
+#endif /* RTE_SCHED_ENQUEUE */
+
+#if RTE_SCHED_TS_CREDITS_UPDATE == 0
+
+#define grinder_credits_update(port, pos)
+
+#elif !defined(RTE_SCHED_SUBPORT_TC_OV)
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *params = grinder->pipe_params;
+	uint64_t n_periods;
+	
+	/* Subport TB */
+	n_periods = (port->time - subport->tb_time) / subport->tb_period;
+	subport->tb_credits += n_periods * subport->tb_credits_per_period;
+	subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+	subport->tb_time += n_periods * subport->tb_period;
+	
+	/* Pipe TB */
+	n_periods = (port->time - pipe->tb_time) / params->tb_period;
+	pipe->tb_credits += n_periods * params->tb_credits_per_period;
+	pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+	pipe->tb_time += n_periods * params->tb_period;
+
+	/* Subport TCs */
+	if (unlikely(port->time >= subport->tc_time)) {
+		subport->tc_credits[0] = subport->tc_credits_per_period[0];
+		subport->tc_credits[1] = subport->tc_credits_per_period[1];
+		subport->tc_credits[2] = subport->tc_credits_per_period[2];
+		subport->tc_credits[3] = subport->tc_credits_per_period[3];
+		subport->tc_time = port->time + subport->tc_period;
+	}
+	
+	/* Pipe TCs */
+	if (unlikely(port->time >= pipe->tc_time)) {
+		pipe->tc_credits[0] = params->tc_credits_per_period[0];
+		pipe->tc_credits[1] = params->tc_credits_per_period[1];
+		pipe->tc_credits[2] = params->tc_credits_per_period[2];
+		pipe->tc_credits[3] = params->tc_credits_per_period[3];
+		pipe->tc_time = port->time + params->tc_period;
+	}
+}
+
+#else
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *params = grinder->pipe_params;
+	uint64_t n_periods;
+	
+	/* Subport TB */
+	n_periods = (port->time - subport->tb_time) / subport->tb_period;
+	subport->tb_credits += n_periods * subport->tb_credits_per_period;
+	subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+	subport->tb_time += n_periods * subport->tb_period;
+	
+	/* Pipe TB */
+	n_periods = (port->time - pipe->tb_time) / params->tb_period;
+	pipe->tb_credits += n_periods * params->tb_credits_per_period;
+	pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+	pipe->tb_time += n_periods * params->tb_period;
+
+	/* Subport TCs */
+	if (unlikely(port->time >= subport->tc_ov_time)) {
+		uint64_t n_ov_periods;
+		
+		if (unlikely(port->time >= subport->tc_time)) {
+			subport->tc_credits[0] = subport->tc_credits_per_period[0];
+			subport->tc_credits[1] = subport->tc_credits_per_period[1];
+			subport->tc_credits[2] = subport->tc_credits_per_period[2];
+			subport->tc_credits[3] = subport->tc_credits_per_period[3];
+			
+			subport->tc_time = port->time + subport->tc_period;
+		}
+		
+		n_ov_periods = (subport->tc_time - port->time + subport->tc_ov_period - 1) / subport->tc_ov_period;
+		
+		subport->tc_ov_credits[0] = subport->tc_credits[0] / (n_ov_periods * subport->tc_ov_n[0]);
+		subport->tc_ov_credits[1] = subport->tc_credits[1] / (n_ov_periods * subport->tc_ov_n[1]);
+		subport->tc_ov_credits[2] = subport->tc_credits[2] / (n_ov_periods * subport->tc_ov_n[2]);
+		subport->tc_ov_credits[3] = subport->tc_credits[3] / (n_ov_periods * subport->tc_ov_n[3]);
+		
+		subport->tc_ov_time = port->time + subport->tc_ov_period;
+		subport->tc_ov_period_id ++;
+	}
+	
+	/* Pipe TCs */
+	if (unlikely(port->time >= pipe->tc_time)) {
+		pipe->tc_credits[0] = params->tc_credits_per_period[0];
+		pipe->tc_credits[1] = params->tc_credits_per_period[1];
+		pipe->tc_credits[2] = params->tc_credits_per_period[2];
+		pipe->tc_credits[3] = params->tc_credits_per_period[3];
+		pipe->tc_time = port->time + params->tc_period;
+	}
+	if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
+		uint32_t pipe_tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+		uint32_t tc_mask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+		uint32_t mask[] = {UINT32_MAX, 0};
+		
+		tc_mask[0] = mask[subport->tc_ov[0]];
+		tc_mask[1] = mask[subport->tc_ov[1]];
+		tc_mask[2] = mask[subport->tc_ov[2]];
+		tc_mask[3] = mask[subport->tc_ov[3]];
+		
+		pipe_tc_ov_credits[0] = subport->tc_ov_credits[0] * params->tc_ov_weight[0];
+		pipe_tc_ov_credits[1] = subport->tc_ov_credits[1] * params->tc_ov_weight[1];
+		pipe_tc_ov_credits[2] = subport->tc_ov_credits[2] * params->tc_ov_weight[2];
+		pipe_tc_ov_credits[3] = subport->tc_ov_credits[3] * params->tc_ov_weight[3];
+		
+		pipe->tc_ov_credits[0] = (tc_mask[0] & pipe->tc_credits[0]) | ((~ tc_mask[0]) & pipe_tc_ov_credits[0]);
+		pipe->tc_ov_credits[1] = (tc_mask[1] & pipe->tc_credits[1]) | ((~ tc_mask[1]) & pipe_tc_ov_credits[1]);
+		pipe->tc_ov_credits[2] = (tc_mask[2] & pipe->tc_credits[2]) | ((~ tc_mask[2]) & pipe_tc_ov_credits[2]);
+		pipe->tc_ov_credits[3] = (tc_mask[3] & pipe->tc_credits[3]) | ((~ tc_mask[3]) & pipe_tc_ov_credits[3]);
+		
+		pipe->tc_ov_period_id = subport->tc_ov_period_id;
+	}
+}
+
+#endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
+
+#ifndef RTE_SCHED_SUBPORT_TC_OV
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+	int enough_credits;
+
+	/* Check queue credits */
+	enough_credits = (pkt_len <= subport->tb_credits) &&
+		(pkt_len <= subport->tc_credits[tc_index]) &&
+		(pkt_len <= pipe->tb_credits) &&
+		(pkt_len <= pipe->tc_credits[tc_index]);
+	
+	if (!enough_credits) {
+		return 0;
+	}
+	
+	/* Update port credits */
+	subport->tb_credits -= pkt_len;
+	subport->tc_credits[tc_index] -= pkt_len;
+	pipe->tb_credits -= pkt_len;
+	pipe->tc_credits[tc_index] -= pkt_len;
+
+	return 1;
+}
+
+#else
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+	uint32_t subport_tb_credits = subport->tb_credits;
+	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
+	uint32_t pipe_tb_credits = pipe->tb_credits;
+	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
+	uint32_t pipe_tc_ov_credits = pipe->tc_ov_credits[tc_index];
+	int enough_credits;
+	
+	/* Check pipe and subport credits */
+	enough_credits = (pkt_len <= subport_tb_credits) &&
+		(pkt_len <= subport_tc_credits) &&
+		(pkt_len <= pipe_tb_credits) &&
+		(pkt_len <= pipe_tc_credits) &&
+		(pkt_len <= pipe_tc_ov_credits);
+	
+	if (!enough_credits) {
+		return 0;
+	}
+	
+	/* Update pipe and subport credits */
+	subport->tb_credits -= pkt_len;
+	subport->tc_credits[tc_index] -= pkt_len;
+	pipe->tb_credits -= pkt_len;
+	pipe->tc_credits[tc_index] -= pkt_len;
+	pipe->tc_ov_credits[tc_index] -= pkt_len;
+	
+	return 1;
+}
+
+#endif /* RTE_SCHED_SUBPORT_TC_OV */
+
+static inline int 
+grinder_schedule(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+
+#if RTE_SCHED_TS_CREDITS_CHECK
+	if (!grinder_credits_check(port, pos)) {
+		return 0;
+	}
+#endif
+
+	/* Advance port time */
+	port->time += pkt_len;
+	
+	/* Send packet */
+	port->pkts_out[port->n_pkts_out ++] = pkt;
+	queue->qr ++;
+	grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos];
+	if (queue->qr == queue->qw) {
+		uint32_t qindex = grinder->qindex[grinder->qpos];
+
+		rte_bitmap_clear(&port->bmp, qindex);
+		grinder->qmask &= ~(1 << grinder->qpos);
+		grinder->wrr_mask[grinder->qpos] = 0;
+		rte_sched_port_set_queue_empty_timestamp(port, qindex);
+	}
+	
+	/* Reset pipe loop detection */
+	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+	grinder->productive = 1;
+	
+	return 1;
+}
+
+#if RTE_SCHED_OPTIMIZATIONS
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+	__m128i index = _mm_set1_epi32 (base_pipe);
+	__m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos);
+	__m128i res = _mm_cmpeq_epi32(pipes, index);
+	pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4));
+	pipes = _mm_cmpeq_epi32(pipes, index);
+	res = _mm_or_si128(res, pipes);
+
+	if (_mm_testz_si128(res, res))
+		return 0;
+
+	return 1;
+}
+
+#else
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+	uint32_t i;
+	
+	for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) {
+		if (port->grinder_base_bmp_pos[i] == base_pipe) {
+			return 1;
+		}
+	}
+	
+	return 0;
+}
+
+#endif /* RTE_SCHED_OPTIMIZATIONS */
+
+static inline void
+grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint16_t w[4];
+
+	grinder->pcache_w = 0;
+	grinder->pcache_r = 0;
+	
+	w[0] = (uint16_t) bmp_slab;
+	w[1] = (uint16_t) (bmp_slab >> 16);
+	w[2] = (uint16_t) (bmp_slab >> 32);
+	w[3] = (uint16_t) (bmp_slab >> 48);
+	
+	grinder->pcache_qmask[grinder->pcache_w] = w[0];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
+	grinder->pcache_w += (w[0] != 0);
+	
+	grinder->pcache_qmask[grinder->pcache_w] = w[1];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
+	grinder->pcache_w += (w[1] != 0);
+	
+	grinder->pcache_qmask[grinder->pcache_w] = w[2];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
+	grinder->pcache_w += (w[2] != 0);
+	
+	grinder->pcache_qmask[grinder->pcache_w] = w[3];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
+	grinder->pcache_w += (w[3] != 0);
+}
+
+static inline void
+grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint8_t b[4];
+	
+	grinder->tccache_w = 0;
+	grinder->tccache_r = 0;
+	
+	b[0] = (uint8_t) (qmask & 0xF);
+	b[1] = (uint8_t) ((qmask >> 4) & 0xF);
+	b[2] = (uint8_t) ((qmask >> 8) & 0xF);
+	b[3] = (uint8_t) ((qmask >> 12) & 0xF);
+	
+	grinder->tccache_qmask[grinder->tccache_w] = b[0];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex;
+	grinder->tccache_w += (b[0] != 0);
+	
+	grinder->tccache_qmask[grinder->tccache_w] = b[1];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
+	grinder->tccache_w += (b[1] != 0);
+	
+	grinder->tccache_qmask[grinder->tccache_w] = b[2];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
+	grinder->tccache_w += (b[2] != 0);
+	
+	grinder->tccache_qmask[grinder->tccache_w] = b[3];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
+	grinder->tccache_w += (b[3] != 0);
+}
+
+static inline int
+grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_mbuf **qbase;
+	uint32_t qindex; 
+	uint16_t qsize; 
+
+	if (grinder->tccache_r == grinder->tccache_w) {
+		return 0;
+	}
+
+	qindex = grinder->tccache_qindex[grinder->tccache_r];
+	qbase = rte_sched_port_qbase(port, qindex);
+	qsize = rte_sched_port_qsize(port, qindex);
+
+	grinder->tc_index = (qindex >> 2) & 0x3;
+	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
+	grinder->qsize = qsize;
+	
+	grinder->qindex[0] = qindex;
+	grinder->qindex[1] = qindex + 1;
+	grinder->qindex[2] = qindex + 2;
+	grinder->qindex[3] = qindex + 3;
+
+	grinder->queue[0] = port->queue + qindex;
+	grinder->queue[1] = port->queue + qindex + 1;
+	grinder->queue[2] = port->queue + qindex + 2;
+	grinder->queue[3] = port->queue + qindex + 3;
+
+	grinder->qbase[0] = qbase;
+	grinder->qbase[1] = qbase + qsize;
+	grinder->qbase[2] = qbase + 2 * qsize;
+	grinder->qbase[3] = qbase + 3 * qsize;
+	
+	grinder->tccache_r ++;
+	return 1;
+}
+
+static inline int
+grinder_next_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint32_t pipe_qindex;
+	uint16_t pipe_qmask;
+
+	if (grinder->pcache_r < grinder->pcache_w) {
+		pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
+		pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
+		grinder->pcache_r ++;
+	} else {
+		uint64_t bmp_slab = 0;
+		uint32_t bmp_pos = 0;
+		
+		/* Get another non-empty pipe group */		
+		if (unlikely(rte_bitmap_scan(&port->bmp, &bmp_pos, &bmp_slab) <= 0)) {
+			return 0;
+		}
+		
+#if RTE_SCHED_DEBUG
+		debug_check_queue_slab(port, bmp_pos, bmp_slab);
+#endif	
+
+		/* Return if pipe group already in one of the other grinders */
+		port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
+		if (unlikely(grinder_pipe_exists(port, bmp_pos))) {
+			return 0;
+		}
+		port->grinder_base_bmp_pos[pos] = bmp_pos;
+		
+		/* Install new pipe group into grinder's pipe cache */
+		grinder_pcache_populate(port, pos, bmp_pos, bmp_slab);
+
+		pipe_qmask = grinder->pcache_qmask[0];
+		pipe_qindex = grinder->pcache_qindex[0];
+		grinder->pcache_r = 1;
+	}
+	
+	/* Install new pipe in the grinder */
+	grinder->pindex = pipe_qindex >> 4;
+	grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport);
+	grinder->pipe = port->pipe + grinder->pindex;
+	grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
+	grinder->productive = 0;
+
+	grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask);
+	grinder_next_tc(port, pos);
+	
+	/* Check for pipe exhaustion */
+	if (grinder->pindex == port->pipe_loop) {
+		port->pipe_exhaustion = 1;
+		port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+	}
+	
+	return 1;	
+}
+
+#if RTE_SCHED_WRR == 0
+
+#define grinder_wrr_load(a,b)
+
+#define grinder_wrr_store(a,b)
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint64_t slab = grinder->qmask;
+	
+	if (rte_bsf64(slab, &grinder->qpos) == 0) {
+		rte_panic("grinder wrr\n");
+	}
+}
+
+#elif RTE_SCHED_WRR == 1
+
+static inline void
+grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t qmask = grinder->qmask;
+	uint32_t qindex;
+	
+	qindex = tc_index * 4;
+	
+	grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
+	grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
+	grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
+	grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
+	
+	grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
+	grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
+	grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
+	grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
+	
+	grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
+	grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
+	grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
+	grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+}
+
+static inline void
+grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t qindex;
+	
+	qindex = tc_index * 4;
+	
+	pipe->wrr_tokens[qindex] = (uint8_t) ((grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >> RTE_SCHED_WRR_SHIFT);
+	pipe->wrr_tokens[qindex + 1] = (uint8_t) ((grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >> RTE_SCHED_WRR_SHIFT);
+	pipe->wrr_tokens[qindex + 2] = (uint8_t) ((grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >> RTE_SCHED_WRR_SHIFT);
+	pipe->wrr_tokens[qindex + 3] = (uint8_t) ((grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >> RTE_SCHED_WRR_SHIFT);
+}
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint16_t wrr_tokens_min;
+
+	grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
+	grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
+	grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
+	grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
+	
+	grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
+	wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
+	
+	grinder->wrr_tokens[0] -= wrr_tokens_min;
+	grinder->wrr_tokens[1] -= wrr_tokens_min;
+	grinder->wrr_tokens[2] -= wrr_tokens_min;
+	grinder->wrr_tokens[3] -= wrr_tokens_min;
+}
+
+#else
+
+#error Invalid value for RTE_SCHED_WRR
+
+#endif /* RTE_SCHED_WRR */
+
+#define grinder_evict(port, pos)
+
+static inline void
+grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	
+	rte_prefetch0(grinder->pipe);
+	rte_prefetch0(grinder->queue[0]);
+}
+
+static inline void
+grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint16_t qsize, qr[4];
+	
+	qsize = grinder->qsize;
+	qr[0] = grinder->queue[0]->qr & (qsize - 1);
+	qr[1] = grinder->queue[1]->qr & (qsize - 1);
+	qr[2] = grinder->queue[2]->qr & (qsize - 1);
+	qr[3] = grinder->queue[3]->qr & (qsize - 1);
+	
+	rte_prefetch0(grinder->qbase[0] + qr[0]);
+	rte_prefetch0(grinder->qbase[1] + qr[1]);
+
+	grinder_wrr_load(port, pos);
+	grinder_wrr(port, pos);
+	
+	rte_prefetch0(grinder->qbase[2] + qr[2]);
+	rte_prefetch0(grinder->qbase[3] + qr[3]);	
+}
+
+static inline void
+grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint32_t qpos = grinder->qpos;
+	struct rte_mbuf **qbase = grinder->qbase[qpos];
+	uint16_t qsize = grinder->qsize;
+	uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
+	
+	grinder->pkt = qbase[qr];
+	rte_prefetch0(grinder->pkt);
+	
+	if (unlikely((qr & 0x7) == 7)) {
+		uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
+		
+		rte_prefetch0(qbase + qr_next);
+	}
+}
+
+static inline uint32_t
+grinder_handle(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	
+	switch (grinder->state) {
+	case e_GRINDER_PREFETCH_PIPE:
+	{
+		if (grinder_next_pipe(port, pos)) {
+			grinder_prefetch_pipe(port, pos);
+			port->busy_grinders ++;
+			
+			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+			return 0;
+		}
+		
+		return 0;
+	}
+
+	case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
+	{
+		struct rte_sched_pipe *pipe = grinder->pipe;
+		
+		grinder->pipe_params = port->pipe_profiles + pipe->profile;
+		grinder_prefetch_tc_queue_arrays(port, pos);
+		grinder_credits_update(port, pos);
+		
+		grinder->state = e_GRINDER_PREFETCH_MBUF;
+		return 0;
+	}
+	
+	case e_GRINDER_PREFETCH_MBUF:
+	{
+		grinder_prefetch_mbuf(port, pos);
+		
+		grinder->state = e_GRINDER_READ_MBUF;
+		return 0;
+	}
+	
+	case e_GRINDER_READ_MBUF:
+	{
+		uint32_t result = 0;
+		
+		result = grinder_schedule(port, pos);
+		
+		/* Look for next packet within the same TC */
+		if (result && grinder->qmask) {
+			grinder_wrr(port, pos);
+			grinder_prefetch_mbuf(port, pos);
+			
+			return 1;
+		}
+		grinder_wrr_store(port, pos);
+		
+		/* Look for another active TC within same pipe */
+		if (grinder_next_tc(port, pos)) {
+			grinder_prefetch_tc_queue_arrays(port, pos);
+			
+			grinder->state = e_GRINDER_PREFETCH_MBUF;
+			return result;
+		}		
+		if ((grinder->productive == 0) && (port->pipe_loop == RTE_SCHED_PIPE_INVALID)) {
+			port->pipe_loop = grinder->pindex;
+		}
+		grinder_evict(port, pos);
+		
+		/* Look for another active pipe */
+		if (grinder_next_pipe(port, pos)) {
+			grinder_prefetch_pipe(port, pos);
+			
+			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+			return result;
+		}
+		
+		/* No active pipe found */
+		port->busy_grinders --;
+		
+		grinder->state = e_GRINDER_PREFETCH_PIPE;
+		return result;
+	}
+	
+	default:
+		rte_panic("Algorithmic error (invalid state)\n");
+		return 0;
+	}
+}
+
+static inline void 
+rte_sched_port_time_resync(struct rte_sched_port *port)
+{
+	uint64_t cycles = rte_get_tsc_cycles();
+	uint64_t cycles_diff = cycles - port->time_cpu_cycles;
+	double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte;
+	
+	/* Advance port time */
+	port->time_cpu_cycles = cycles;
+	port->time_cpu_bytes += (uint64_t) bytes_diff;
+	if (port->time < port->time_cpu_bytes) {
+		port->time = port->time_cpu_bytes;
+	}
+
+	/* Reset pipe loop detection */
+	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+}
+
+static inline int
+rte_sched_port_exceptions(struct rte_sched_port *port)
+{
+	int exceptions;
+
+	/* Check if any exception flag is set */
+	exceptions = (port->busy_grinders == 0) ||
+		(port->pipe_exhaustion == 1);
+	
+	/* Clear exception flags */
+	port->pipe_exhaustion = 0;
+	
+	return exceptions;
+}
+
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	uint32_t i, count;
+	
+	port->pkts_out = pkts;
+	port->n_pkts_out = 0;
+	
+	rte_sched_port_time_resync(port);
+	
+	/* Take each queue in the grinder one step further */
+	for (i = 0, count = 0; ; i ++)  {
+		count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1));
+		if ((count == n_pkts) || rte_sched_port_exceptions(port)) {
+			break;
+		}
+	}
+	
+	return count;
+}
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
new file mode 100644
index 0000000000..7b49248b59
--- /dev/null
+++ b/lib/librte_sched/rte_sched.h
@@ -0,0 +1,446 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_H__
+#define __INCLUDE_RTE_SCHED_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Hierarchical Scheduler
+ *
+ * The hierarchical scheduler prioritizes the transmission of packets from different
+ * users and traffic classes according to the Service Level Agreements (SLAs) defined
+ * for the current network node.
+ *
+ * The scheduler supports thousands of packet queues grouped under a 5-level hierarchy:
+ *     1. Port: 
+ *           - Typical usage: output Ethernet port;
+ *           - Multiple ports are scheduled in round robin order with equal priority;
+ *     2. Subport:
+ *           - Typical usage: group of users;
+ *           - Traffic shaping using the token bucket algorithm (one bucket per subport);
+ *           - Upper limit enforced per traffic class at subport level;
+ *           - Lower priority traffic classes able to reuse subport bandwidth currently
+ *             unused by higher priority traffic classes of the same subport;
+ *           - When any subport traffic class is oversubscribed (configuration time 
+ *             event), the usage of subport member pipes with high demand for that 
+ *             traffic class pipes is truncated to a dynamically adjusted value with no 
+ *             impact to low demand pipes;
+ *     3. Pipe: 
+ *           - Typical usage: individual user/subscriber;
+ *           - Traffic shaping using the token bucket algorithm (one bucket per pipe);
+ *     4. Traffic class:
+ *           - Traffic classes of the same pipe handled in strict priority order;
+ *           - Upper limit enforced per traffic class at the pipe level;
+ *           - Lower priority traffic classes able to reuse pipe bandwidth currently
+ *             unused by higher priority traffic classes of the same pipe;
+ *     5. Queue:
+ *           - Typical usage: queue hosting packets from one or multiple connections 
+ *             of same traffic class belonging to the same user;
+ *           - Weighted Round Robin (WRR) is used to service the queues within same 
+ *             pipe traffic class.
+ *
+ ***/
+
+#include <sys/types.h>
+#include <rte_mbuf.h>
+#include <rte_meter.h>
+
+/** Random Early Detection (RED) */
+#ifdef RTE_SCHED_RED
+#include "rte_red.h"
+#endif
+
+/** Number of traffic classes per pipe (as well as subport). Cannot be changed. */
+#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
+
+/** Number of queues per pipe traffic class. Cannot be changed. */
+#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
+
+/** Number of queues per pipe. */
+#define RTE_SCHED_QUEUES_PER_PIPE             \
+	(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *     \
+	RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+
+/** Maximum number of pipe profiles that can be defined per port. Compile-time configurable.*/
+#ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT
+#define RTE_SCHED_PIPE_PROFILES_PER_PORT      256
+#endif
+
+/** Ethernet framing overhead. Overhead fields per Ethernet frame:
+   1. Preamble:                             7 bytes;
+   2. Start of Frame Delimiter (SFD):       1 byte;
+   3. Frame Check Sequence (FCS):           4 bytes;
+   4. Inter Frame Gap (IFG):               12 bytes.
+The FCS is considered overhead only if not included in the packet length (field pkt.pkt_len
+of struct rte_mbuf). */
+#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
+#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
+#endif
+
+/** Subport configuration parameters. The period and credits_per_period parameters are measured
+in bytes, with one byte meaning the time duration associated with the transmission of one byte 
+on the physical medium of the output port, with pipe or pipe traffic class rate (measured as 
+percentage of output port rate) determined as credits_per_period divided by period. One credit
+represents one byte. */
+struct rte_sched_subport_params {
+	/* Subport token bucket */
+	uint32_t tb_rate;                /**< Subport token bucket rate (measured in bytes per second) */
+	uint32_t tb_size;                /**< Subport token bucket size (measured in credits) */
+	
+	/* Subport traffic classes */
+	uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Subport traffic class rates (measured in bytes per second) */
+	uint32_t tc_period;              /**< Enforcement period for traffic class rates (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	uint32_t tc_ov_period;           /**< Enforcement period for traffic class oversubscription (measured in milliseconds) */
+#endif
+};
+
+/** Subport statistics */
+struct rte_sched_subport_stats {
+	/* Packets */
+	uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets successfully written to current
+	                                      subport for each traffic class */
+	uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets dropped by the current
+	                                      subport for each traffic class due to subport queues being full or congested*/
+	
+	/* Bytes */
+	uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes successfully written to current 
+	                                      subport for each traffic class*/
+	uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes dropped by the current 
+                                          subport for each traffic class due to subport queues being full or congested */
+};
+
+/** Pipe configuration parameters. The period and credits_per_period parameters are measured
+in bytes, with one byte meaning the time duration associated with the transmission of one byte 
+on the physical medium of the output port, with pipe or pipe traffic class rate (measured as 
+percentage of output port rate) determined as credits_per_period divided by period. One credit
+represents one byte. */
+struct rte_sched_pipe_params {
+	/* Pipe token bucket */
+	uint32_t tb_rate;                /**< Pipe token bucket rate (measured in bytes per second) */
+	uint32_t tb_size;                /**< Pipe token bucket size (measured in credits) */
+	
+	/* Pipe traffic classes */
+	uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Pipe traffic class rates (measured in bytes per second) */
+	uint32_t tc_period;              /**< Enforcement period for pipe traffic class rates (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Traffic class weights to be used for the 
+	                                      current pipe in the event of subport traffic class oversubscription */
+#endif
+	
+	/* Pipe queues */
+	uint8_t  wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights for the queues of the current pipe */
+};
+
+/** Queue statistics */
+struct rte_sched_queue_stats {
+	/* Packets */
+	uint32_t n_pkts;                 /**< Number of packets successfully written to current queue */
+	uint32_t n_pkts_dropped;         /**< Number of packets dropped due to current queue being full or congested */
+	
+	/* Bytes */
+	uint32_t n_bytes;                /**< Number of bytes successfully written to current queue */
+	uint32_t n_bytes_dropped;        /**< Number of bytes dropped due to current queue being full or congested */	
+};
+
+/** Port configuration parameters. */
+struct rte_sched_port_params {
+	const char *name;                /**< Literal string to be associated to the current port scheduler instance */
+	int socket;                      /**< CPU socket ID where the memory for port scheduler should be allocated */
+	uint32_t rate;                   /**< Output port rate (measured in bytes per second) */
+	uint32_t frame_overhead;         /**< Framing overhead per packet (measured in bytes) */
+	uint32_t n_subports_per_port;    /**< Number of subports for the current port scheduler instance*/
+	uint32_t n_pipes_per_subport;    /**< Number of pipes for each port scheduler subport */
+	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Packet queue size for each traffic class. All queues 
+	                                      within the same pipe traffic class have the same size. Queues from 
+										  different pipes serving the same traffic class have the same size. */
+	struct rte_sched_pipe_params *pipe_profiles; /**< Pipe profile table defined for current port scheduler instance.
+                                          Every pipe of the current port scheduler is configured using one of the
+										  profiles from this table. */
+	uint32_t n_pipe_profiles;        /**< Number of profiles in the pipe profile table */
+#ifdef RTE_SCHED_RED
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */
+#endif
+};
+
+/** Path through the scheduler hierarchy used by the scheduler enqueue operation to
+identify the destination queue for the current packet. Stored in the field pkt.hash.sched
+of struct rte_mbuf of each packet, typically written by the classification stage and read by 
+scheduler enqueue.*/
+struct rte_sched_port_hierarchy {
+	uint32_t queue:2;                /**< Queue ID (0 .. 3) */
+	uint32_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
+	uint32_t pipe:20;                /**< Pipe ID */
+	uint32_t subport:6;              /**< Subport ID */
+	uint32_t color:2;                /**< Color */
+};
+
+/*
+ * Configuration
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler port configuration
+ *
+ * @param params
+ *   Port scheduler configuration parameter structure
+ * @return
+ *   Handle to port scheduler instance upon success or NULL otherwise.
+ */
+struct rte_sched_port * 
+rte_sched_port_config(struct rte_sched_port_params *params);
+
+/**
+ * Hierarchical scheduler port free
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ */
+void
+rte_sched_port_free(struct rte_sched_port *port);
+
+/**
+ * Hierarchical scheduler subport configuration
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param params
+ *   Subport configuration parameters
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_config(struct rte_sched_port *port, 
+	uint32_t subport_id,
+	struct rte_sched_subport_params *params);
+
+/**
+ * Hierarchical scheduler pipe configuration
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param pipe_id
+ *   Pipe ID within subport
+ * @param pipe_profile
+ *   ID of port-level pre-configured pipe profile
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+	uint32_t subport_id, 
+	uint32_t pipe_id,
+	int32_t pipe_profile);
+
+/**
+ * Hierarchical scheduler memory footprint size per port
+ *
+ * @param params
+ *   Port scheduler configuration parameter structure
+ * @return
+ *   Memory footprint size in bytes upon success, 0 otherwise
+ */
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
+
+/*
+ * Statistics 
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler subport statistics read
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param stats
+ *   Pointer to pre-allocated subport statistics structure where the statistics 
+ *   counters should be stored
+ * @param tc_ov
+ *   Pointer to pre-allocated 4-entry array where the oversubscription status for
+ *   each of the 4 subport traffic classes should be stored.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+	uint32_t subport_id,
+	struct rte_sched_subport_stats *stats,
+	uint32_t *tc_ov);
+
+/**
+ * Hierarchical scheduler queue statistics read
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param queue_id
+ *   Queue ID within port scheduler
+ * @param stats
+ *   Pointer to pre-allocated subport statistics structure where the statistics 
+ *   counters should be stored
+ * @param qlen
+ *   Pointer to pre-allocated variable where the current queue length should be stored.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+	uint32_t queue_id,
+	struct rte_sched_queue_stats *stats,
+	uint16_t *qlen);
+
+/* 
+ * Run-time 
+ *
+ ***/
+
+/**
+ * Scheduler hierarchy path write to packet descriptor. Typically called by the 
+ * packet classification stage.
+ * 
+ * @param pkt
+ *   Packet descriptor handle
+ * @param subport
+ *   Subport ID
+ * @param pipe
+ *   Pipe ID within subport
+ * @param traffic_class
+ *   Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ *   Queue ID within pipe traffic class (0 .. 3)
+ */
+static inline void
+rte_sched_port_pkt_write(struct rte_mbuf *pkt, 
+	uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color)
+{
+	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+	
+	sched->color = (uint32_t) color;
+	sched->subport = subport;
+	sched->pipe = pipe;
+	sched->traffic_class = traffic_class;
+	sched->queue = queue;
+}
+
+/**
+ * Scheduler hierarchy path read from packet descriptor (struct rte_mbuf). Typically
+ * called as part of the hierarchical scheduler enqueue operation. The subport, 
+ * pipe, traffic class and queue parameters need to be pre-allocated by the caller.
+ *
+ * @param pkt
+ *   Packet descriptor handle
+ * @param subport
+ *   Subport ID
+ * @param pipe
+ *   Pipe ID within subport
+ * @param traffic_class
+ *   Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ *   Queue ID within pipe traffic class (0 .. 3)
+ *   
+ */
+static inline void
+rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue)
+{
+	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+	
+	*subport = sched->subport;
+	*pipe = sched->pipe;
+	*traffic_class = sched->traffic_class;
+	*queue = sched->queue;
+}
+
+static inline enum rte_meter_color
+rte_sched_port_pkt_read_color(struct rte_mbuf *pkt)
+{
+	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+
+	return (enum rte_meter_color) sched->color;
+}
+
+/**
+ * Hierarchical scheduler port enqueue. Writes up to n_pkts to port scheduler and 
+ * returns the number of packets actually written. For each packet, the port scheduler
+ * queue to write the packet to is identified by reading the hierarchy path from the 
+ * packet descriptor; if the queue is full or congested and the packet is not written 
+ * to the queue, then the packet is automatically dropped without any action required 
+ * from the caller.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param pkts
+ *   Array storing the packet descriptor handles
+ * @param n_pkts
+ *   Number of packets to enqueue from the pkts array into the port scheduler
+ * @return
+ *   Number of packets successfully enqueued
+ */
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+/**
+ * Hierarchical scheduler port dequeue. Reads up to n_pkts from the port scheduler 
+ * and stores them in the pkts array and returns the number of packets actually read. 
+ * The pkts array needs to be pre-allocated by the caller with at least n_pkts entries.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param pkts
+ *   Pre-allocated packet descriptor array where the packets dequeued from the port 
+ *   scheduler should be stored
+ * @param n_pkts
+ *   Number of packets to dequeue from the port scheduler
+ * @return
+ *   Number of packets successfully dequeued and placed in the pkts array
+ */
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_H__ */
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
new file mode 100644
index 0000000000..dc76ad8675
--- /dev/null
+++ b/lib/librte_sched/rte_sched_common.h
@@ -0,0 +1,130 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_COMMON_H__
+#define __INCLUDE_RTE_SCHED_COMMON_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#define __rte_aligned_16 __attribute__((__aligned__(16)))
+
+static inline uint32_t
+rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
+{
+	return (x < y)? x : y;
+}
+
+#if 0
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+	uint32_t pos0, pos1;
+		
+	pos0 = (x[0] <= x[1])? 0 : 1;
+	pos1 = (x[2] <= x[3])? 2 : 3;
+
+	return (x[pos0] <= x[pos1])? pos0 : pos1;
+}
+
+#else
+
+/* simplified version to remove branches with CMOV instruction */
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+	uint32_t pos0 = 0;
+	uint32_t pos1 = 2;
+
+	if (x[1] <= x[0]) pos0 = 1;
+	if (x[3] <= x[2]) pos1 = 3;
+	if (x[pos1] <= x[pos0]) pos0 = pos1;
+
+	return pos0;
+}
+
+#endif
+
+/*
+ * Compute the Greatest Common Divisor (GCD) of two numbers.
+ * This implementation uses Euclid's algorithm:
+ *    gcd(a, 0) = a
+ *    gcd(a, b) = gcd(b, a mod b)
+ *
+ */
+static inline uint32_t
+rte_get_gcd(uint32_t a, uint32_t b)
+{
+	uint32_t c;
+	
+	if (a == 0)
+		return b;
+	if (b == 0)
+		return a;
+	
+	if (a < b) {
+		c = a;
+		a = b;
+		b = c;
+	}
+	
+	while (b != 0) {
+		c = a % b;
+		a = b;
+		b = c;
+	}
+	
+	return a;
+}
+
+/*
+ * Compute the Lowest Common Denominator (LCD) of two numbers.
+ * This implementation computes GCD first:
+ *    LCD(a, b) = (a * b) / GCD(a, b)
+ *
+ */
+static inline uint32_t
+rte_get_lcd(uint32_t a, uint32_t b)
+{
+	return (a * b) / rte_get_gcd(a, b);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 8eb45d863d..4b802556f7 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -105,6 +105,12 @@ ifeq ($(CONFIG_RTE_LIBRTE_METER),y)
 LDLIBS += -lrte_meter
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
+LDLIBS += -lrte_sched
+LDLIBS += -lm
+LDLIBS += -lrt
+endif
+
 LDLIBS += --start-group
 
 ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y)
-- 
2.20.1