sched: initial import
authorIntel <intel.com>
Mon, 3 Jun 2013 00:00:00 +0000 (00:00 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Tue, 17 Sep 2013 12:09:21 +0000 (14:09 +0200)
Signed-off-by: Intel
32 files changed:
app/test/Makefile
app/test/autotest_data.py
app/test/commands.c
app/test/test.h
app/test/test_red.c [new file with mode: 0644]
app/test/test_sched.c [new file with mode: 0755]
config/defconfig_i686-default-linuxapp-gcc
config/defconfig_i686-default-linuxapp-icc
config/defconfig_x86_64-default-linuxapp-gcc
config/defconfig_x86_64-default-linuxapp-icc
examples/qos_sched/Makefile [new file with mode: 0755]
examples/qos_sched/app_thread.c [new file with mode: 0755]
examples/qos_sched/args.c [new file with mode: 0755]
examples/qos_sched/cfg_file.c [new file with mode: 0755]
examples/qos_sched/cfg_file.h [new file with mode: 0755]
examples/qos_sched/init.c [new file with mode: 0755]
examples/qos_sched/main.c [new file with mode: 0755]
examples/qos_sched/main.h [new file with mode: 0755]
examples/qos_sched/profile.cfg [new file with mode: 0644]
lib/Makefile
lib/librte_eal/common/include/rte_log.h
lib/librte_mbuf/rte_mbuf.h
lib/librte_sched/Makefile [new file with mode: 0644]
lib/librte_sched/rte_approx.c [new file with mode: 0644]
lib/librte_sched/rte_approx.h [new file with mode: 0644]
lib/librte_sched/rte_bitmap.h [new file with mode: 0644]
lib/librte_sched/rte_red.c [new file with mode: 0644]
lib/librte_sched/rte_red.h [new file with mode: 0644]
lib/librte_sched/rte_sched.c [new file with mode: 0644]
lib/librte_sched/rte_sched.h [new file with mode: 0644]
lib/librte_sched/rte_sched_common.h [new file with mode: 0644]
mk/rte.app.mk

index 6ba18e4..4fecdb6 100755 (executable)
@@ -85,6 +85,8 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_ipaddr.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_cirbuf.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_string.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_lib.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_red.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_sched.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_meter.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_pm.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_acl.c
index 9bd436b..f2f9965 100755 (executable)
@@ -271,7 +271,7 @@ parallel_test_group_list = [
 },
 {
        "Prefix":       "group_6",
-       "Memory" :      all_sockets(588),
+       "Memory" :      all_sockets(600),
        "Tests" :       
        [
                {
@@ -297,7 +297,13 @@ parallel_test_group_list = [
                 "Command" :    "prefetch_autotest",
                 "Func" :       default_autotest,
                 "Report" :     None,
-               },
+                },
+               {
+                "Name" :"Red autotest",
+                "Command" : "red_autotest",
+                "Func" :default_autotest,
+                "Report" :None,
+                },
        ]
 },
 {
@@ -317,6 +323,12 @@ parallel_test_group_list = [
                 "Func" :       default_autotest,
                 "Report" :     None,
                },
+                {
+                "Name" :"Sched autotest",
+                "Command" : "sched_autotest",
+                "Func" :default_autotest,
+                "Report" :None,
+                },
        ]
 },
 ]
index 2438433..c7ac1e4 100755 (executable)
@@ -167,6 +167,10 @@ static void cmd_autotest_parsed(void *parsed_result,
                ret |= test_memcpy_perf();
        if (all || !strcmp(res->autotest, "func_reentrancy_autotest"))
                ret |= test_func_reentrancy();
+       if (all || !strcmp(res->autotest, "red_autotest"))
+               ret |= test_red();
+       if (all || !strcmp(res->autotest, "sched_autotest"))
+               ret |= test_sched();
        if (all || !strcmp(res->autotest, "meter_autotest"))
                ret |= test_meter();
        if (all || !strcmp(res->autotest, "pm_autotest"))
@@ -203,7 +207,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
                        "version_autotest#eal_fs_autotest#"
                        "cmdline_autotest#func_reentrancy_autotest#"
                        "mempool_perf_autotest#hash_perf_autotest#"
-                       "meter_autotest#"
+                       "red_autotest#meter_autotest#sched_autotest#"
                        "memcpy_perf_autotest#pm_autotest#"
                        "acl_autotest#power_autotest#"
                        "all_autotests");
index 75df8d0..6bac209 100755 (executable)
@@ -84,6 +84,8 @@ int test_version(void);
 int test_eal_fs(void);
 int test_cmdline(void);
 int test_func_reentrancy(void);
+int test_red(void);
+int test_sched(void);
 int test_meter(void);
 int test_pmac_pm(void);
 int test_pmac_acl(void);
diff --git a/app/test/test_red.c b/app/test/test_red.c
new file mode 100644 (file)
index 0000000..f083349
--- /dev/null
@@ -0,0 +1,1890 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#ifdef RTE_LIBRTE_SCHED
+
+#include <rte_red.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+#define TEST_HZ_PER_KHZ 1000
+#define TEST_NSEC_MARGIN 500        /**< nanosecond margin when calculating clk freq */
+
+#define MAX_QEMPTY_TIME_MSEC   50000
+#define MSEC_PER_SEC           1000      /**< Milli-seconds per second */
+#define USEC_PER_MSEC          1000      /**< Micro-seconds per milli-second */
+#define USEC_PER_SEC           1000000   /**< Micro-seconds per second */
+
+/**< structures for testing rte_red performance and function */
+struct test_rte_red_config {        /**< Test structure for RTE_RED config */
+       struct rte_red_config *rconfig; /**< RTE_RED configuration parameters */
+       uint8_t num_cfg;                /**< Number of RTE_RED configs to test */
+       uint8_t *wq_log2;               /**< Test wq_log2 value to use */
+       uint32_t min_th;                /**< Queue minimum threshold */
+       uint32_t max_th;                /**< Queue maximum threshold */
+       uint8_t *maxp_inv;              /**< Inverse mark probability */
+};
+
+struct test_queue {                 /**< Test structure for RTE_RED Queues */
+       struct rte_red *rdata;          /**< RTE_RED runtime data */
+       uint32_t num_queues;            /**< Number of RTE_RED queues to test */
+       uint32_t *qconfig;              /**< Configuration of RTE_RED queues for test */
+       uint32_t *q;                    /**< Queue size */
+       uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+       uint32_t avg_ramp_up;           /**< Average num of enqueues to ramp up the queue */ 
+       uint32_t avg_tolerance;         /**< Tolerance in queue average */
+       double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_RED */
+       uint32_t wait_usec;             /**< Micro second wait interval */
+       uint32_t num_iterations;        /**< Number of test iterations */
+       uint32_t num_ops;               /**< Number of test operations */
+       uint64_t clk_freq;              /**< CPU clock frequency */
+       uint32_t sleep_sec;             /**< Seconds to sleep */
+       uint32_t *dropped;              /**< Test operations dropped */
+       uint32_t *enqueued;             /**< Test operations enqueued */
+};
+
+struct test_config {                /**< Master test structure for RTE_RED */
+       const char *ifname;             /**< Interface name */
+       const char *msg;                /**< Test message for display */
+       const char *htxt;               /**< Header txt display for result output */
+       struct test_rte_red_config *tconfig; /**< Test structure for RTE_RED config */
+       struct test_queue *tqueue;      /**< Test structure for RTE_RED Queues */
+       struct test_var *tvar;          /**< Test variables used for testing RTE_RED */
+       uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+       FAIL = 0,
+       PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+       struct test_config *testcfg;
+       enum test_result (*testfn)(struct test_config *);
+};
+
+struct rdtsc_prof {
+       uint64_t clk_start;
+       uint64_t clk_min;               /**< min clocks */
+       uint64_t clk_max;               /**< max clocks */
+       uint64_t clk_avgc;              /**< count to calc average */
+       double clk_avg;                 /**< cumulative sum to calc average */
+       const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte = 0;
+static double pkt_time_usec = 0;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+       double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+       inv_cycles_per_byte = 1.0 / cycles_per_byte;
+       pkt_time_usec = 1000000.0 / ((double)port_speed_bytes / (double)RTE_RED_S);
+}
+
+static uint64_t get_port_ts(void)
+{
+       return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+       p->clk_min = (uint64_t)(-1LL);
+       p->clk_max = 0;
+       p->clk_avg = 0;
+       p->clk_avgc = 0;
+       p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+       asm( "cpuid" : : : "%eax", "%ebx", "%ecx", "%edx" );
+       p->clk_start = rte_rdtsc();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+       uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+       p->clk_avgc++;
+       p->clk_avg += (double) clk_start;
+
+       if (clk_start > p->clk_max)
+               p->clk_max = clk_start;
+       if (clk_start < p->clk_min)
+               p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+       if (p->clk_avgc>0) {
+               printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64 ", max=%" PRIu64 ", avg=%.1f\n",
+                       p->name,
+                       p->clk_avgc,
+                       p->clk_min,
+                       p->clk_max,
+                       (p->clk_avg / ((double) p->clk_avgc)));
+       }
+}
+
+static uint32_t rte_red_get_avg_int(const struct rte_red_config *red_cfg,
+                                   struct rte_red *red)
+{
+       /**
+        * scale by 1/n and convert from fixed-point to integer
+        */
+       return red->avg >> (RTE_RED_SCALING + red_cfg->wq_log2);
+}
+
+static double rte_red_get_avg_float(const struct rte_red_config *red_cfg,
+                                   struct rte_red *red)
+{
+       /**
+        * scale by 1/n and convert from fixed-point to floating-point
+        */
+       return ldexp((double)red->avg,  -(RTE_RED_SCALING + red_cfg->wq_log2));
+}
+
+static void rte_red_set_avg_int(const struct rte_red_config *red_cfg,
+                               struct rte_red *red,
+                               uint32_t avg)
+{
+       /**
+        * scale by n and convert from integer to fixed-point
+        */
+       red->avg = avg << (RTE_RED_SCALING + red_cfg->wq_log2);
+}
+
+static double calc_exp_avg_on_empty(double avg, uint32_t n, uint32_t time_diff)
+{
+       return avg * pow((1.0 - 1.0 / (double)n), (double)time_diff / pkt_time_usec);
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+       return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ * calculate the drop probability
+ */
+static double calc_drop_prob(uint32_t min_th, uint32_t max_th,
+                            uint32_t maxp_inv, uint32_t avg)
+{
+       double drop_prob = 0.0;
+
+       if (avg < min_th) {
+               drop_prob = 0.0;
+       } else if (avg < max_th) {
+               drop_prob = (1.0 / (double)maxp_inv)
+                       * ((double)(avg - min_th)
+                          / (double)(max_th - min_th));
+       } else {
+               drop_prob = 1.0;
+       }
+       return (drop_prob);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob, double tolerance)
+{
+       double abs_diff = 0.0;
+       int ret = 1;
+
+       abs_diff = fabs(drop_rate - drop_prob);
+       if ((int)abs_diff == 0) {
+               *diff = 0.0;
+       } else {
+               *diff = (abs_diff / drop_prob) * 100.0;
+               if (*diff > tolerance) {
+                       ret = 0;
+               }
+        }
+       return (ret);
+}
+
+/**
+ *  check if average queue size is within tolerance
+ */
+static int check_avg(double *diff, double avg, double exp_avg, double tolerance)
+{
+       double abs_diff = 0.0;
+       int ret = 1;
+
+       abs_diff = fabs(avg - exp_avg);
+       if ((int)abs_diff == 0) {
+               *diff = 0.0;
+       } else {
+               *diff = (abs_diff / exp_avg) * 100.0;
+               if (*diff > tolerance) {
+                       ret = 0;
+                }
+       }
+       return (ret);
+}
+
+/**
+ * get the clk frequency in Hz
+ */
+static uint64_t get_machclk_freq(void)
+{
+       uint64_t start = 0;
+       uint64_t end = 0;
+       uint64_t diff = 0;
+       uint64_t clk_freq_hz = 0;
+       struct timespec tv_start = {0, 0}, tv_end = {0, 0};
+       struct timespec req = {0, 0};
+
+       req.tv_sec = 1;
+       req.tv_nsec = 0;
+
+       clock_gettime(CLOCK_REALTIME, &tv_start);
+       start = rte_rdtsc();
+
+       if (nanosleep(&req, NULL) != 0) {
+               perror("get_machclk_freq()");
+               exit(EXIT_FAILURE);
+       }
+
+       clock_gettime(CLOCK_REALTIME, &tv_end);
+       end = rte_rdtsc();
+
+       diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * USEC_PER_SEC
+               + ((tv_end.tv_nsec - tv_start.tv_nsec + TEST_NSEC_MARGIN) / 
+                  USEC_PER_MSEC); /**< diff is in micro secs */
+
+       if (diff == 0)
+               return(0);
+
+       clk_freq_hz = ((end - start) * USEC_PER_SEC / diff);
+       return (clk_freq_hz);
+}
+
+/**
+ * initialize the test rte_red config
+ */
+static enum test_result
+test_rte_red_init(struct test_config *tcfg)
+{
+       unsigned i = 0;
+
+       tcfg->tvar->clk_freq = get_machclk_freq();
+       init_port_ts( tcfg->tvar->clk_freq );
+
+       for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+               if (rte_red_config_init(&tcfg->tconfig->rconfig[i],
+                                       (uint16_t)tcfg->tconfig->wq_log2[i],
+                                       (uint16_t)tcfg->tconfig->min_th,
+                                       (uint16_t)tcfg->tconfig->max_th,
+                                       (uint16_t)tcfg->tconfig->maxp_inv[i]) != 0) {
+                       return(FAIL);
+               }
+       }
+
+       *tcfg->tqueue->q = 0;
+       *tcfg->tvar->dropped = 0;
+       *tcfg->tvar->enqueued = 0;
+       return(PASS);
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_actual_qsize(struct rte_red_config *red_cfg,
+                      struct rte_red *red,
+                      uint32_t *q,
+                      uint32_t level,
+                      uint32_t attempts)
+{
+        uint32_t i = 0;
+
+        for (i = 0; i < attempts; i++) {
+                int ret = 0;
+
+                /**
+                 * enqueue
+                 */
+                ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts() );
+                if (ret == 0) {
+                        if (++(*q) >= level)
+                                break;
+                }
+        }
+        /**
+        * check if target actual queue size has been reached
+        */
+        if (*q != level)
+                return (-1);
+        /**
+         * success
+         */
+        return (0);
+}
+
+/**
+ * enqueue until average queue size reaches target level
+ */
+static int
+increase_average_qsize(struct rte_red_config *red_cfg,
+                       struct rte_red *red,
+                       uint32_t *q,
+                       uint32_t level,
+                       uint32_t num_ops)
+{
+        uint32_t avg = 0;
+        uint32_t i = 0;
+
+        for (i = 0; i < num_ops; i++) {
+                /**
+                 * enqueue
+                 */
+                rte_red_enqueue(red_cfg, red, *q, get_port_ts());
+        }
+        /**
+         * check if target average queue size has been reached
+         */
+        avg = rte_red_get_avg_int(red_cfg, red);
+        if (avg != level)
+                return (-1);
+        /**
+         * success
+         */
+        return (0);
+}
+
+/**
+ * setup default values for the functional test structures
+ */
+static struct rte_red_config ft_wrconfig[1];
+static struct rte_red ft_rtdata[1];
+static uint8_t ft_wq_log2[] = {9};
+static uint8_t ft_maxp_inv[] = {10}; 
+static uint32_t  ft_qconfig[] = {0, 0, 1, 1};
+static uint32_t  ft_q[] ={0};
+static uint32_t  ft_dropped[] ={0};
+static uint32_t  ft_enqueued[] ={0};
+
+static struct test_rte_red_config ft_tconfig =  {
+       .rconfig = ft_wrconfig,
+       .num_cfg = DIM(ft_wrconfig),
+       .wq_log2 = ft_wq_log2,
+       .min_th = 32,
+       .max_th = 128,
+       .maxp_inv = ft_maxp_inv,
+};
+
+static struct test_queue ft_tqueue = {
+       .rdata = ft_rtdata,
+       .num_queues = DIM(ft_rtdata),
+       .qconfig = ft_qconfig,
+       .q = ft_q,
+       .q_ramp_up = 1000000,
+       .avg_ramp_up = 1000000,
+       .avg_tolerance = 5,  /* 5 percent */
+       .drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_var ft_tvar = {
+       .wait_usec = 250000,
+       .num_iterations = 20,
+       .num_ops = 10000,
+       .clk_freq = 0,
+       .dropped = ft_dropped,
+       .enqueued = ft_enqueued,
+       .sleep_sec = (MAX_QEMPTY_TIME_MSEC / MSEC_PER_SEC) + 2,
+};
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void enqueue_dequeue_func(struct rte_red_config *red_cfg,
+                                 struct rte_red *red,
+                                 uint32_t *q,
+                                 uint32_t num_ops,
+                                 uint32_t *enqueued,
+                                 uint32_t *dropped)
+{
+        uint32_t i = 0;
+
+        for (i = 0; i < num_ops; i++) {
+                int ret = 0;
+
+                /**
+                 * enqueue
+                 */
+                ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts());
+                if (ret == 0)
+                        (*enqueued)++;
+                else
+                        (*dropped)++;
+        }
+}
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft1_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test1_config = {
+       .ifname = "functional test 1 interface",
+       .msg = "functional test 1 : use one rte_red configuration,\n"
+       "                   increase average queue size to various levels,\n"
+       "                   compare drop rate to drop probability\n\n",
+       .htxt = "                "
+       "avg queue size "
+       "enqueued       "
+       "dropped        "
+       "drop prob %    "
+       "drop rate %    "
+       "diff %         "
+       "tolerance %    "
+       "\n",
+       .tconfig = &ft_tconfig,
+       .tqueue = &ft_tqueue,
+       .tvar = &ft_tvar,
+       .tlevel = ft1_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       uint32_t i = 0;
+
+       printf("%s", tcfg->msg);
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+
+       printf("%s", tcfg->htxt); 
+
+       for (i = 0; i < DIM(ft1_tlevels); i++) {
+               const char *label = NULL;
+               uint32_t avg = 0;
+               double drop_rate = 0.0;
+               double drop_prob = 0.0;
+               double diff = 0.0;
+
+               /**
+                * reset rte_red run-time data
+                */
+               rte_red_rt_data_init(tcfg->tqueue->rdata);
+               *tcfg->tvar->enqueued = 0;
+               *tcfg->tvar->dropped = 0;
+
+               if (increase_actual_qsize(tcfg->tconfig->rconfig,
+                                         tcfg->tqueue->rdata,
+                                         tcfg->tqueue->q,
+                                         tcfg->tlevel[i],
+                                         tcfg->tqueue->q_ramp_up) != 0) {
+                       result = FAIL;
+                       goto out;
+               }
+
+               if (increase_average_qsize(tcfg->tconfig->rconfig,
+                                          tcfg->tqueue->rdata,
+                                          tcfg->tqueue->q,
+                                          tcfg->tlevel[i],
+                                          tcfg->tqueue->avg_ramp_up) != 0)  {
+                       result = FAIL;
+                       goto out;
+               }
+
+               enqueue_dequeue_func(tcfg->tconfig->rconfig,
+                                    tcfg->tqueue->rdata,
+                                    tcfg->tqueue->q,
+                                    tcfg->tvar->num_ops,
+                                    tcfg->tvar->enqueued,
+                                    tcfg->tvar->dropped);
+
+               avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+               if (avg != tcfg->tlevel[i]) {
+                        fprintf(stderr, "Fail: avg != level\n");
+                       result = FAIL;
+                }
+
+               drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+               drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+                                          *tcfg->tconfig->maxp_inv, tcfg->tlevel[i]);
+               if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+                       result = FAIL;
+
+               if (tcfg->tlevel[i] == tcfg->tconfig->min_th)
+                       label = "min thresh:     ";
+               else if (tcfg->tlevel[i] == tcfg->tconfig->max_th)
+                       label = "max thresh:     ";
+               else
+                       label = "                ";
+               printf("%s%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+                      label, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+                      drop_prob * 100.0, drop_rate * 100.0, diff,
+                      (double)tcfg->tqueue->drop_tolerance);
+       }
+out:
+       return (result);
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint8_t ft2_wq_log2[] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9};
+static uint8_t ft2_maxp_inv[] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100};
+static struct rte_red_config ft2_rconfig[10];
+
+static struct test_rte_red_config ft2_tconfig =  {
+       .rconfig = ft2_rconfig,
+       .num_cfg = DIM(ft2_rconfig),
+       .wq_log2 = ft2_wq_log2,
+       .min_th = 32,
+       .max_th = 128,
+       .maxp_inv = ft2_maxp_inv,
+};
+
+static struct test_config func_test2_config = {
+       .ifname = "functional test 2 interface",
+       .msg = "functional test 2 : use several RED configurations,\n"
+       "                   increase average queue size to just below maximum threshold,\n"
+       "                   compare drop rate to drop probability\n\n",
+       .htxt = "RED config     "
+       "avg queue size "
+       "min threshold  "
+       "max threshold  "
+       "drop prob %    "
+       "drop rate %    "
+       "diff %         "
+       "tolerance %    "
+       "\n",
+       .tconfig = &ft2_tconfig,
+       .tqueue = &ft_tqueue,
+       .tvar = &ft_tvar,
+       .tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+        double prev_drop_rate = 1.0;
+       uint32_t i = 0;
+
+       printf("%s", tcfg->msg);
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+       rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+       if (increase_actual_qsize(tcfg->tconfig->rconfig,
+                                 tcfg->tqueue->rdata,
+                                 tcfg->tqueue->q,
+                                 *tcfg->tlevel,
+                                 tcfg->tqueue->q_ramp_up) != 0) {
+               result = FAIL;
+               goto out;
+       }
+
+       if (increase_average_qsize(tcfg->tconfig->rconfig,
+                                  tcfg->tqueue->rdata,
+                                  tcfg->tqueue->q,
+                                  *tcfg->tlevel,
+                                  tcfg->tqueue->avg_ramp_up) != 0) {
+               result = FAIL;
+               goto out;
+       }
+       printf("%s", tcfg->htxt);
+
+       for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+               uint32_t avg = 0;
+               double drop_rate = 0.0;
+               double drop_prob = 0.0;
+               double diff = 0.0;
+
+               *tcfg->tvar->dropped = 0;
+               *tcfg->tvar->enqueued = 0;
+
+               enqueue_dequeue_func(&tcfg->tconfig->rconfig[i],
+                                    tcfg->tqueue->rdata,
+                                    tcfg->tqueue->q,
+                                    tcfg->tvar->num_ops,
+                                    tcfg->tvar->enqueued,
+                                    tcfg->tvar->dropped);
+
+               avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[i], tcfg->tqueue->rdata);
+               if (avg != *tcfg->tlevel)
+                       result = FAIL;
+
+               drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+               drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+                                          tcfg->tconfig->maxp_inv[i], *tcfg->tlevel);
+               if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+                       result = FAIL;
+               /**
+                * drop rate should decrease as maxp_inv increases
+                */
+               if (drop_rate > prev_drop_rate)
+                       result = FAIL;
+               prev_drop_rate = drop_rate;
+
+               printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+                      i, avg, tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+                      drop_prob * 100.0, drop_rate * 100.0, diff,
+                      (double)tcfg->tqueue->drop_tolerance);
+       }
+out:
+       return (result);
+}
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevel[] = {1022};
+
+static struct test_rte_red_config ft3_tconfig =  {
+       .rconfig = ft_wrconfig,
+       .num_cfg = DIM(ft_wrconfig),
+       .wq_log2 = ft_wq_log2,
+       .min_th = 32,
+       .max_th = 1023,
+       .maxp_inv = ft_maxp_inv,
+};
+
+static struct test_config func_test3_config = {
+       .ifname = "functional test 3 interface",
+       .msg = "functional test 3 : use one RED configuration,\n"
+       "                   increase average queue size to target level,\n"
+       "                   dequeue all packets until queue is empty,\n"
+       "                   confirm that average queue size is computed correctly while queue is empty\n\n",
+       .htxt = "q avg before   "
+       "q avg after    "
+       "expected       "
+       "difference %   "
+       "tolerance %    "
+       "result  "
+       "\n",
+       .tconfig = &ft3_tconfig,
+       .tqueue = &ft_tqueue,
+       .tvar = &ft_tvar,
+       .tlevel = ft3_tlevel,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       uint32_t i = 0;
+
+       printf("%s", tcfg->msg);
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+
+       rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+       if (increase_actual_qsize(tcfg->tconfig->rconfig,
+                                 tcfg->tqueue->rdata,
+                                 tcfg->tqueue->q,
+                                 *tcfg->tlevel,
+                                 tcfg->tqueue->q_ramp_up) != 0) {
+               result = FAIL;
+               goto out;
+       }
+
+       if (increase_average_qsize(tcfg->tconfig->rconfig,
+                                  tcfg->tqueue->rdata,
+                                  tcfg->tqueue->q,
+                                  *tcfg->tlevel,
+                                  tcfg->tqueue->avg_ramp_up) != 0) {
+               result = FAIL;
+               goto out;
+       }
+
+       printf("%s", tcfg->htxt);
+
+       for (i = 0; i < tcfg->tvar->num_iterations; i++) {
+               double avg_before = 0;
+               double avg_after = 0;
+                double exp_avg = 0;
+               double diff = 0.0;
+
+               avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+
+               /**
+               * empty the queue
+               */
+               *tcfg->tqueue->q = 0;
+               rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+               rte_delay_us(tcfg->tvar->wait_usec);
+
+               /**
+                * enqueue one packet to recalculate average queue size
+                */
+               if (rte_red_enqueue(tcfg->tconfig->rconfig,
+                                   tcfg->tqueue->rdata,
+                                   *tcfg->tqueue->q,
+                                   get_port_ts()) == 0) {
+                       (*tcfg->tqueue->q)++;
+               } else {
+                       printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__);
+                       result = FAIL;
+               }
+
+               exp_avg = calc_exp_avg_on_empty(avg_before, 
+                                             (1 << *tcfg->tconfig->wq_log2),
+                                             tcfg->tvar->wait_usec);
+               avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, 
+                                                 tcfg->tqueue->rdata);
+               if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+                       result = FAIL;
+
+               printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+                      avg_before, avg_after, exp_avg, diff,
+                      (double)tcfg->tqueue->avg_tolerance,
+                      diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+       }
+out:
+       return (result);
+}
+
+/**
+ * Test F4: functional test 4
+ */
+static uint32_t ft4_tlevel[] = {1022};
+static uint8_t ft4_wq_log2[] = {11};
+
+static struct test_rte_red_config ft4_tconfig =  {
+       .rconfig = ft_wrconfig,
+       .num_cfg = DIM(ft_wrconfig),
+       .min_th = 32,
+       .max_th = 1023,
+       .wq_log2 = ft4_wq_log2,
+       .maxp_inv = ft_maxp_inv,
+};
+
+static struct test_queue ft4_tqueue = {
+       .rdata = ft_rtdata,
+       .num_queues = DIM(ft_rtdata),
+       .qconfig = ft_qconfig,
+       .q = ft_q,
+       .q_ramp_up = 1000000,
+       .avg_ramp_up = 1000000,
+       .avg_tolerance = 0,  /* 0 percent */
+       .drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_config func_test4_config = {
+       .ifname = "functional test 4 interface",
+       .msg = "functional test 4 : use one RED configuration,\n"
+       "                   increase average queue size to target level,\n"
+       "                   dequeue all packets until queue is empty,\n"
+       "                   confirm that average queue size is computed correctly while\n"
+       "                   queue is empty for more than 50 sec,\n"
+       "                   (this test takes 52 sec to run)\n\n",
+       .htxt = "q avg before   "
+       "q avg after    "
+       "expected       "
+       "difference %   "
+       "tolerance %    "
+       "result  "
+       "\n",
+       .tconfig = &ft4_tconfig,
+       .tqueue = &ft4_tqueue,
+       .tvar = &ft_tvar,
+       .tlevel = ft4_tlevel,
+};
+
+static enum test_result func_test4(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       uint64_t time_diff = 0;
+       uint64_t start = 0;
+       double avg_before = 0.0;
+       double avg_after = 0.0;
+        double exp_avg = 0.0;
+        double diff = 0.0;
+
+       printf("%s", tcfg->msg);
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+
+       rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+       if (increase_actual_qsize(tcfg->tconfig->rconfig,
+                                 tcfg->tqueue->rdata,
+                                 tcfg->tqueue->q,
+                                 *tcfg->tlevel,
+                                 tcfg->tqueue->q_ramp_up) != 0) {
+               result = FAIL;
+               goto out;
+       }
+
+       if (increase_average_qsize(tcfg->tconfig->rconfig,
+                                  tcfg->tqueue->rdata,
+                                  tcfg->tqueue->q,
+                                  *tcfg->tlevel,
+                                  tcfg->tqueue->avg_ramp_up) != 0) {
+               result = FAIL;
+               goto out;
+       }
+
+       printf("%s", tcfg->htxt);
+
+       avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+
+       /**
+        * empty the queue
+        */
+       *tcfg->tqueue->q = 0;
+       rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+       /**
+        * record empty time locally 
+        */
+       start = rte_rdtsc();
+
+       sleep(tcfg->tvar->sleep_sec);
+
+       /**
+        * enqueue one packet to recalculate average queue size
+        */
+       if (rte_red_enqueue(tcfg->tconfig->rconfig,  
+                           tcfg->tqueue->rdata, 
+                           *tcfg->tqueue->q,
+                           get_port_ts()) != 0) {
+               result = FAIL;
+               goto out;
+       }
+       (*tcfg->tqueue->q)++;
+
+       /**
+        * calculate how long queue has been empty
+        */
+       time_diff = ((rte_rdtsc() - start) / tcfg->tvar->clk_freq)
+                 * MSEC_PER_SEC;
+       if (time_diff < MAX_QEMPTY_TIME_MSEC) {
+               /**
+                * this could happen if sleep was interrupted for some reason
+                */
+               result = FAIL;
+               goto out;
+       }
+
+       /**
+        * confirm that average queue size is now at expected level
+        */
+        exp_avg = 0.0;
+       avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+       if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+               result = FAIL;
+
+       printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+              avg_before, avg_after, exp_avg,
+              diff, (double)tcfg->tqueue->avg_tolerance,
+              diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+out:
+       return (result);
+}
+
+/**
+ * Test F5: functional test 5
+ */
+static uint32_t ft5_tlevel[] = {127};
+static uint8_t ft5_wq_log2[] = {9, 8};
+static uint8_t ft5_maxp_inv[] = {10, 20};
+static struct rte_red_config ft5_config[2];
+static struct rte_red ft5_data[4];
+static uint32_t ft5_q[4];
+static uint32_t ft5_dropped[] = {0, 0, 0, 0};
+static uint32_t ft5_enqueued[] = {0, 0, 0, 0};
+
+static struct test_rte_red_config ft5_tconfig =  {
+       .rconfig = ft5_config,
+       .num_cfg = DIM(ft5_config),
+       .min_th = 32,
+       .max_th = 128,
+       .wq_log2 = ft5_wq_log2,
+       .maxp_inv = ft5_maxp_inv,
+};
+
+static struct test_queue ft5_tqueue = {
+       .rdata = ft5_data,
+       .num_queues = DIM(ft5_data),
+       .qconfig = ft_qconfig,
+       .q = ft5_q,
+       .q_ramp_up = 1000000,
+       .avg_ramp_up = 1000000,
+       .avg_tolerance = 5,  /* 10 percent */
+       .drop_tolerance = 50,  /* 50 percent */
+};
+
+struct test_var ft5_tvar = {
+       .wait_usec = 0,
+       .num_iterations = 15,
+       .num_ops = 10000,
+       .clk_freq = 0,
+       .dropped = ft5_dropped,
+       .enqueued = ft5_enqueued,
+       .sleep_sec = 0,
+};
+
+static struct test_config func_test5_config = {
+       .ifname = "functional test 5 interface",
+       .msg = "functional test 5 : use several queues (each with its own run-time data),\n"
+       "                   use several RED configurations (such that each configuration is shared by multiple queues),\n"
+       "                   increase average queue size to just below maximum threshold,\n"
+       "                   compare drop rate to drop probability,\n"
+       "                   (this is a larger scale version of functional test 2)\n\n",
+       .htxt = "queue          "
+       "config         "
+       "avg queue size "
+       "min threshold  "
+       "max threshold  "
+       "drop prob %    "
+       "drop rate %    "
+       "diff %         "
+       "tolerance %    "
+       "\n",
+       .tconfig = &ft5_tconfig,
+       .tqueue = &ft5_tqueue,
+       .tvar = &ft5_tvar,
+       .tlevel = ft5_tlevel,
+};
+
+static enum test_result func_test5(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       uint32_t j = 0;
+
+       printf("%s", tcfg->msg);
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+
+       printf("%s", tcfg->htxt);
+
+       for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+               rte_red_rt_data_init(&tcfg->tqueue->rdata[j]);
+               tcfg->tqueue->q[j] = 0;
+
+               if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                         &tcfg->tqueue->rdata[j],
+                                         &tcfg->tqueue->q[j],
+                                         *tcfg->tlevel,
+                                         tcfg->tqueue->q_ramp_up) != 0) {
+                       result = FAIL;
+                       goto out;
+               }
+
+               if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                          &tcfg->tqueue->rdata[j],
+                                          &tcfg->tqueue->q[j],
+                                          *tcfg->tlevel,
+                                          tcfg->tqueue->avg_ramp_up) != 0) {
+                       result = FAIL;
+                       goto out;
+               }
+       }
+
+       for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+               uint32_t avg = 0;
+               double drop_rate = 0.0;
+               double drop_prob = 0.0;
+               double diff = 0.0;
+
+               tcfg->tvar->dropped[j] = 0;
+               tcfg->tvar->enqueued[j] = 0;
+
+               enqueue_dequeue_func(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                    &tcfg->tqueue->rdata[j],
+                                    &tcfg->tqueue->q[j],
+                                    tcfg->tvar->num_ops,
+                                    &tcfg->tvar->enqueued[j],
+                                    &tcfg->tvar->dropped[j]);
+
+               avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                         &tcfg->tqueue->rdata[j]);
+               if (avg != *tcfg->tlevel)
+                       result = FAIL;
+
+               drop_rate = calc_drop_rate(tcfg->tvar->enqueued[j],tcfg->tvar->dropped[j]);
+               drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+                                          tcfg->tconfig->maxp_inv[tcfg->tqueue->qconfig[j]], 
+                                          *tcfg->tlevel);
+               if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+                       result = FAIL;
+
+               printf("%-15u%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+                      j, tcfg->tqueue->qconfig[j], avg,
+                      tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+                      drop_prob * 100.0, drop_rate * 100.0,
+                      diff, (double)tcfg->tqueue->drop_tolerance);
+       }
+out:
+       return (result);
+}
+
+/**
+ * Test F6: functional test 6
+ */
+static uint32_t ft6_tlevel[] = {1022};
+static uint8_t ft6_wq_log2[] = {9, 8};
+static uint8_t ft6_maxp_inv[] = {10, 20};
+static struct rte_red_config ft6_config[2];
+static struct rte_red ft6_data[4];
+static uint32_t ft6_q[4];
+
+static struct test_rte_red_config ft6_tconfig =  {
+       .rconfig = ft6_config,
+       .num_cfg = DIM(ft6_config),
+       .min_th = 32,
+       .max_th = 1023,
+       .wq_log2 = ft6_wq_log2,
+       .maxp_inv = ft6_maxp_inv,
+};
+
+static struct test_queue ft6_tqueue = {
+       .rdata = ft6_data,
+       .num_queues = DIM(ft6_data),
+       .qconfig = ft_qconfig,
+       .q = ft6_q,
+       .q_ramp_up = 1000000,
+       .avg_ramp_up = 1000000,
+       .avg_tolerance = 5,  /* 10 percent */
+       .drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_config func_test6_config = {
+       .ifname = "functional test 6 interface",
+       .msg = "functional test 6 : use several queues (each with its own run-time data),\n"
+       "                   use several RED configurations (such that each configuration is sharte_red by multiple queues),\n"
+       "                   increase average queue size to target level,\n"
+       "                   dequeue all packets until queue is empty,\n"
+       "                   confirm that average queue size is computed correctly while queue is empty\n"
+       "                   (this is a larger scale version of functional test 3)\n\n",
+       .htxt = "queue          "
+       "config         "
+       "q avg before   "
+       "q avg after    "
+       "expected       "
+       "difference %   "
+       "tolerance %    "
+       "result  ""\n",
+       .tconfig = &ft6_tconfig,
+       .tqueue = &ft6_tqueue,
+       .tvar = &ft_tvar,
+       .tlevel = ft6_tlevel,
+};
+
+static enum test_result func_test6(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       uint32_t j = 0;
+
+       printf("%s", tcfg->msg);
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+       printf("%s", tcfg->htxt);
+
+       for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+               rte_red_rt_data_init(&tcfg->tqueue->rdata[j]);
+               tcfg->tqueue->q[j] = 0;
+
+               if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                         &tcfg->tqueue->rdata[j],
+                                         &tcfg->tqueue->q[j],
+                                         *tcfg->tlevel,
+                                         tcfg->tqueue->q_ramp_up) != 0) {
+                       result = FAIL;
+                       goto out;
+               }
+               if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                          &tcfg->tqueue->rdata[j],
+                                          &tcfg->tqueue->q[j],
+                                          *tcfg->tlevel,
+                                          tcfg->tqueue->avg_ramp_up) != 0) {
+                       result = FAIL;
+                       goto out;
+               }
+       }
+       for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+               double avg_before = 0;
+               double avg_after = 0;
+               double exp_avg = 0;
+               double diff = 0.0;
+
+               avg_before = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], 
+                                                  &tcfg->tqueue->rdata[j]);
+
+               /**
+                * empty the queue
+                */
+               tcfg->tqueue->q[j] = 0;
+               rte_red_mark_queue_empty(&tcfg->tqueue->rdata[j], get_port_ts());
+               rte_delay_us(tcfg->tvar->wait_usec);
+
+               /**
+                * enqueue one packet to recalculate average queue size
+                */
+               if (rte_red_enqueue(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], 
+                                   &tcfg->tqueue->rdata[j],
+                                   tcfg->tqueue->q[j],
+                                   get_port_ts()) == 0) {
+                       tcfg->tqueue->q[j]++;
+               } else {
+                       printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__);
+                       result = FAIL;
+               }
+
+               exp_avg = calc_exp_avg_on_empty(avg_before, 
+                               (1 << tcfg->tconfig->wq_log2[tcfg->tqueue->qconfig[j]]),
+                               tcfg->tvar->wait_usec);
+               avg_after = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+                                               &tcfg->tqueue->rdata[j]);
+               if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+                       result = FAIL;
+
+               printf("%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+                      j, tcfg->tqueue->qconfig[j], avg_before, avg_after,
+                      exp_avg, diff, (double)tcfg->tqueue->avg_tolerance,
+                      diff <= tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+       }
+out:
+       return (result);
+}
+
+/**
+ * setup default values for the performance test structures
+ */
+static struct rte_red_config pt_wrconfig[1];
+static struct rte_red pt_rtdata[1];
+static uint8_t pt_wq_log2[] = {9};
+static uint8_t pt_maxp_inv[] = {10}; 
+static uint32_t pt_qconfig[] = {0};
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+
+static struct test_rte_red_config pt_tconfig =  {
+       .rconfig = pt_wrconfig,
+       .num_cfg = DIM(pt_wrconfig),
+       .wq_log2 = pt_wq_log2,
+       .min_th = 32,
+       .max_th = 128,
+       .maxp_inv = pt_maxp_inv,
+};
+
+static struct test_queue pt_tqueue = {
+       .rdata = pt_rtdata,
+       .num_queues = DIM(pt_rtdata),
+       .qconfig = pt_qconfig,
+       .q = pt_q,
+       .q_ramp_up = 1000000,
+       .avg_ramp_up = 1000000,
+       .avg_tolerance = 5,  /* 10 percent */
+       .drop_tolerance = 50,  /* 50 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ */
+static void enqueue_dequeue_perf(struct rte_red_config *red_cfg,
+                                struct rte_red *red,
+                                uint32_t *q,
+                                uint32_t num_ops,
+                                uint32_t *enqueued,
+                                uint32_t *dropped,
+                                struct rdtsc_prof *prof)
+{
+       uint32_t i = 0;
+
+       for (i = 0; i < num_ops; i++) {
+               uint64_t ts = 0;
+               int ret = 0;
+               /**
+                * enqueue
+                */
+               ts = get_port_ts();
+               rdtsc_prof_start(prof);
+               ret = rte_red_enqueue(red_cfg, red, *q, ts );
+               rdtsc_prof_end(prof);
+               if (ret == 0)
+                       (*enqueued)++;
+               else
+                       (*dropped)++;
+       }
+}
+
+/**
+ * Setup test structures for tests P1, P2, P3 
+ * performance tests 1, 2 and 3
+ */
+static uint32_t pt1_tlevel[] = {16};
+static uint32_t pt2_tlevel[] = {80};
+static uint32_t pt3_tlevel[] = {144};
+
+static struct test_var perf1_tvar = {
+       .wait_usec = 0,
+       .num_iterations = 15,
+       .num_ops = 50000000,
+       .clk_freq = 0,
+       .dropped = pt_dropped,
+       .enqueued = pt_enqueued,
+       .sleep_sec = 0
+};
+
+static struct test_config perf1_test1_config = {
+       .ifname = "performance test 1 interface",
+       .msg = "performance test 1 : use one RED configuration,\n"
+       "                    set actual and average queue sizes to level below min threshold,\n"
+       "                    measure enqueue performance\n\n",
+       .tconfig = &pt_tconfig,
+       .tqueue = &pt_tqueue,
+       .tvar = &perf1_tvar,
+       .tlevel = pt1_tlevel,
+};
+
+static struct test_config perf1_test2_config = {
+       .ifname = "performance test 2 interface",
+       .msg = "performance test 2 : use one RED configuration,\n"
+       "                    set actual and average queue sizes to level in between min and max thresholds,\n"
+       "                    measure enqueue performance\n\n",
+       .tconfig = &pt_tconfig,
+       .tqueue = &pt_tqueue,
+       .tvar = &perf1_tvar,
+       .tlevel = pt2_tlevel,
+};
+
+static struct test_config perf1_test3_config = {
+       .ifname = "performance test 3 interface",
+       .msg = "performance test 3 : use one RED configuration,\n"
+       "                    set actual and average queue sizes to level above max threshold,\n"
+       "                    measure enqueue performance\n\n",
+       .tconfig = &pt_tconfig,
+       .tqueue = &pt_tqueue,
+       .tvar = &perf1_tvar,
+       .tlevel = pt3_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance. 
+ * This runs performance tests 1, 2 and 3 
+ */
+static enum test_result perf1_test(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+       uint32_t total = 0;
+
+       printf("%s", tcfg->msg);
+
+       rdtsc_prof_init(&prof, "enqueue");
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+
+       /**
+        * set average queue size to target level
+        */
+       *tcfg->tqueue->q = *tcfg->tlevel;
+
+       /**
+        * initialize the rte_red run time data structure
+        */
+       rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+       /**
+        *  set the queue average
+        */
+       rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel);
+       if (rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata) 
+           != *tcfg->tlevel) {
+               result = FAIL;
+               goto out;
+       }
+
+       enqueue_dequeue_perf(tcfg->tconfig->rconfig,
+                            tcfg->tqueue->rdata,
+                            tcfg->tqueue->q,
+                            tcfg->tvar->num_ops,
+                            tcfg->tvar->enqueued,
+                            tcfg->tvar->dropped,
+                            &prof);
+
+       total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+       printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total,
+              *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+              *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+       rdtsc_prof_print(&prof);
+out:
+       return (result);
+}
+
+/**
+ * Setup test structures for tests P4, P5, P6 
+ * performance tests 4, 5 and 6
+ */
+static uint32_t pt4_tlevel[] = {16};
+static uint32_t pt5_tlevel[] = {80};
+static uint32_t pt6_tlevel[] = {144};
+
+static struct test_var perf2_tvar = {
+       .wait_usec = 500,
+       .num_iterations = 10000,
+       .num_ops = 10000,
+       .dropped = pt_dropped,
+       .enqueued = pt_enqueued,
+       .sleep_sec = 0
+};
+
+static struct test_config perf2_test4_config = {
+       .ifname = "performance test 4 interface",
+       .msg = "performance test 4 : use one RED configuration,\n"
+       "                    set actual and average queue sizes to level below min threshold,\n"
+       "                    dequeue all packets until queue is empty,\n"
+       "                    measure enqueue performance when queue is empty\n\n",
+       .htxt = "iteration      "
+       "q avg before   "
+       "q avg after    "
+       "expected       "
+       "difference %   "
+       "tolerance %    "
+       "result  ""\n",
+       .tconfig = &pt_tconfig,
+       .tqueue = &pt_tqueue,
+       .tvar = &perf2_tvar,
+       .tlevel = pt4_tlevel,
+};
+
+static struct test_config perf2_test5_config = {
+       .ifname = "performance test 5 interface",
+       .msg = "performance test 5 : use one RED configuration,\n"
+       "                    set actual and average queue sizes to level in between min and max thresholds,\n"
+       "                    dequeue all packets until queue is empty,\n"
+       "                    measure enqueue performance when queue is empty\n\n",
+       .htxt = "iteration      "
+       "q avg before   "
+       "q avg after    "
+       "expected       "
+       "difference     "
+       "tolerance      "
+       "result  ""\n",
+       .tconfig = &pt_tconfig,
+       .tqueue = &pt_tqueue,
+       .tvar = &perf2_tvar,
+       .tlevel = pt5_tlevel,
+};
+
+static struct test_config perf2_test6_config = {
+       .ifname = "performance test 6 interface",
+       .msg = "performance test 6 : use one RED configuration,\n"
+       "                    set actual and average queue sizes to level above max threshold,\n"
+       "                    dequeue all packets until queue is empty,\n"
+       "                    measure enqueue performance when queue is empty\n\n",
+       .htxt = "iteration      "
+       "q avg before   "
+       "q avg after    "
+       "expected       "
+       "difference %   "
+       "tolerance %    "
+       "result  ""\n",
+       .tconfig = &pt_tconfig,
+       .tqueue = &pt_tqueue,
+       .tvar = &perf2_tvar,
+       .tlevel = pt6_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance when the 
+ * queue is empty. This runs performance tests 4, 5 and 6 
+ */
+static enum test_result perf2_test(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+       uint32_t total = 0;
+       uint32_t i = 0;
+
+       printf("%s", tcfg->msg);
+
+       rdtsc_prof_init(&prof, "enqueue");
+
+       if (test_rte_red_init(tcfg) != PASS) {
+               result = FAIL;
+               goto out;
+       }
+
+       printf("%s", tcfg->htxt); 
+
+       for (i = 0; i < tcfg->tvar->num_iterations; i++) {
+               uint32_t count = 0;
+               uint64_t ts = 0;
+               double avg_before = 0;
+               int ret = 0;
+
+               /**
+                * set average queue size to target level
+                */
+               *tcfg->tqueue->q = *tcfg->tlevel;
+               count = (*tcfg->tqueue->rdata).count;
+
+               /**
+                * initialize the rte_red run time data structure
+                */
+               rte_red_rt_data_init(tcfg->tqueue->rdata);
+               (*tcfg->tqueue->rdata).count = count;
+
+               /**
+                * set the queue average
+                */
+               rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel);
+               avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+               if ((avg_before < *tcfg->tlevel) || (avg_before > *tcfg->tlevel)) {
+                       result = FAIL;
+                       goto out;
+               }
+
+               /**
+                * empty the queue
+                */
+               *tcfg->tqueue->q = 0;
+               rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+               /**
+                * wait for specified period of time
+                */
+               rte_delay_us(tcfg->tvar->wait_usec);
+
+               /**
+                * measure performance of enqueue operation while queue is empty
+                */
+               ts = get_port_ts();
+               rdtsc_prof_start(&prof);
+               ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, 
+                                     *tcfg->tqueue->q, ts );
+               rdtsc_prof_end(&prof);
+
+               /**
+                * gather enqueued/dropped statistics
+                */
+               if (ret == 0)
+                       (*tcfg->tvar->enqueued)++;
+               else
+                       (*tcfg->tvar->dropped)++;
+
+               /**
+                * on first and last iteration, confirm that
+                * average queue size was computed correctly
+                */
+               if ((i == 0) || (i == tcfg->tvar->num_iterations - 1)) {
+                       double avg_after = 0;
+                       double exp_avg = 0;
+                       double diff = 0.0;
+                       int ok = 0;
+
+                       avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+                       exp_avg = calc_exp_avg_on_empty(avg_before, 
+                                                 (1 << *tcfg->tconfig->wq_log2),
+                                                 tcfg->tvar->wait_usec);
+                       if (check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+                               ok = 1;
+                       printf("%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+                               i, avg_before, avg_after, exp_avg, diff,
+                               (double)tcfg->tqueue->avg_tolerance, ok ? "pass" : "fail");
+                       if (!ok) {
+                               result = FAIL;
+                               goto out;
+                       }
+               }
+       }
+       total =  *tcfg->tvar->enqueued +  *tcfg->tvar->dropped;
+       printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total,
+              *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+              *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+       rdtsc_prof_print(&prof);
+out:
+       return (result);
+}
+
+/**
+ * setup default values for overflow test structures
+ */
+static uint32_t avg_max = 0;
+static uint32_t avg_max_bits = 0;
+
+static struct rte_red_config ovfl_wrconfig[1];
+static struct rte_red ovfl_rtdata[1];
+static uint8_t ovfl_maxp_inv[] = {10}; 
+static uint32_t ovfl_qconfig[] = {0, 0, 1, 1};
+static uint32_t ovfl_q[] ={0};
+static uint32_t ovfl_dropped[] ={0};
+static uint32_t ovfl_enqueued[] ={0};
+static uint32_t ovfl_tlevel[] = {1023};
+static uint8_t ovfl_wq_log2[] = {12};
+
+static struct test_rte_red_config ovfl_tconfig =  {
+       .rconfig = ovfl_wrconfig,
+       .num_cfg = DIM(ovfl_wrconfig),
+       .wq_log2 = ovfl_wq_log2,
+       .min_th = 32,
+       .max_th = 1023,
+       .maxp_inv = ovfl_maxp_inv,
+};
+
+static struct test_queue ovfl_tqueue = {
+       .rdata = ovfl_rtdata,
+       .num_queues = DIM(ovfl_rtdata),
+       .qconfig = ovfl_qconfig,
+       .q = ovfl_q,
+       .q_ramp_up = 1000000,
+       .avg_ramp_up = 1000000,
+       .avg_tolerance = 5,  /* 10 percent */
+       .drop_tolerance = 50,  /* 50 percent */
+};
+
+static struct test_var ovfl_tvar = {
+       .wait_usec = 10000,
+       .num_iterations = 1,
+       .num_ops = 10000,
+       .clk_freq = 0,
+       .dropped = ovfl_dropped,
+       .enqueued = ovfl_enqueued,
+       .sleep_sec = 0
+};
+
+static void ovfl_check_avg(uint32_t avg)
+{
+       if (avg > avg_max) {
+               double avg_log = 0;
+               uint32_t bits = 0;
+               avg_max = avg;
+               avg_log = log(((double)avg_max));
+               avg_log = avg_log / log(2.0);
+               bits = (uint32_t)ceil(avg_log);
+               if (bits > avg_max_bits)
+                       avg_max_bits = bits;
+       }
+}
+
+static struct test_config ovfl_test1_config = {
+       .ifname = "queue avergage overflow test interface",
+       .msg = "overflow test 1 : use one RED configuration,\n"
+       "                 increase average queue size to target level,\n"
+       "                 check maximum number of bits requirte_red to represent avg_s\n\n",
+       .htxt = "avg queue size  "
+       "wq_log2  "
+       "fraction bits  "
+       "max queue avg  "
+       "num bits  "
+       "enqueued  "
+       "dropped   "
+       "drop prob %  "
+       "drop rate %  "
+       "\n",
+       .tconfig = &ovfl_tconfig,
+       .tqueue = &ovfl_tqueue,
+       .tvar = &ovfl_tvar,
+       .tlevel = ovfl_tlevel,
+};
+
+static enum test_result ovfl_test1(struct test_config *tcfg)
+{
+       enum test_result result = PASS;
+       uint32_t avg = 0;
+       uint32_t i = 0;
+       double drop_rate = 0.0;
+       double drop_prob = 0.0;
+       double diff = 0.0;
+       int ret = 0;
+
+       printf("%s", tcfg->msg);
+
+       if (test_rte_red_init(tcfg) != PASS) {
+
+               result = FAIL;
+               goto out;
+       }
+
+       /**
+        * reset rte_red run-time data
+        */
+       rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+       /**
+        * increase actual queue size
+        */
+       for (i = 0; i < tcfg->tqueue->q_ramp_up; i++) {
+               ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+                                     *tcfg->tqueue->q, get_port_ts());
+
+               if (ret == 0) {
+                       if (++(*tcfg->tqueue->q) >= *tcfg->tlevel)
+                               break;
+               }
+       }
+
+       /**
+        * enqueue
+        */
+       for (i = 0; i < tcfg->tqueue->avg_ramp_up; i++) {
+               ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+                                     *tcfg->tqueue->q, get_port_ts());
+               ovfl_check_avg((*tcfg->tqueue->rdata).avg);
+               avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+               if (avg == *tcfg->tlevel) {
+                       if (ret == 0)
+                               (*tcfg->tvar->enqueued)++;
+                       else
+                               (*tcfg->tvar->dropped)++;
+               }
+       }
+
+       /**
+        * check if target average queue size has been reached
+        */
+       avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+       if (avg != *tcfg->tlevel) {
+               result = FAIL;
+               goto out;
+       }
+
+       /**
+        * check drop rate against drop probability
+        */
+       drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+       drop_prob = calc_drop_prob(tcfg->tconfig->min_th,
+                                  tcfg->tconfig->max_th,
+                                  *tcfg->tconfig->maxp_inv,
+                                  *tcfg->tlevel);
+       if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+               result = FAIL;
+
+       printf("%s", tcfg->htxt);
+       
+       printf("%-16u%-9u%-15u0x%08x     %-10u%-10u%-10u%-13.2lf%-13.2lf\n",
+              avg, *tcfg->tconfig->wq_log2, RTE_RED_SCALING,
+              avg_max, avg_max_bits,
+              *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+              drop_prob * 100.0, drop_rate * 100.0);
+out:
+       return (result);
+}
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_tests[] = { 
+       { &func_test1_config, func_test1 },
+       { &func_test2_config, func_test2 },             
+       { &func_test3_config, func_test3 },
+       { &func_test4_config, func_test4 },
+       { &func_test5_config, func_test5 },
+       { &func_test6_config, func_test6 },
+       { &ovfl_test1_config, ovfl_test1 }, 
+};
+
+struct tests perf_tests[] = { 
+       { &perf1_test1_config, perf1_test },
+       { &perf1_test2_config, perf1_test },
+       { &perf1_test3_config, perf1_test },
+       { &perf2_test4_config, perf2_test },
+       { &perf2_test5_config, perf2_test },
+       { &perf2_test6_config, perf2_test },
+};
+
+/**
+ * function to execute the required_red tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count, uint32_t *num_tests, uint32_t *num_pass)
+{
+       enum test_result result = PASS;
+       uint32_t i = 0;
+
+       for (i = 0; i < test_count; i++) {
+               printf("\n--------------------------------------------------------------------------------\n");
+               result = test_type[i].testfn(test_type[i].testcfg);
+               (*num_tests)++;
+               if (result == PASS) {
+                       (*num_pass)++;
+                               printf("-------------------------------------<pass>-------------------------------------\n");
+               } else {
+                       printf("-------------------------------------<fail>-------------------------------------\n");
+               }
+       }
+       return;
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized RED
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+       struct rte_red_config config;
+
+       if (rte_red_rt_data_init(NULL) == 0) {
+               printf("rte_red_rt_data_init should have failed!\n");
+               return -1;
+       }
+
+       if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) {
+               printf("rte_red_config_init should have failed!\n");
+               return -1;
+       }
+
+       if (rte_red_rt_data_init(NULL) == 0) {
+               printf("rte_red_rt_data_init should have failed!\n");
+               return -1;
+       }
+
+       /* NULL config */
+       if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+       /* min_treshold == max_treshold */
+       if (rte_red_config_init(&config, 0, 1, 1, 0) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+       /* min_treshold > max_treshold */
+       if (rte_red_config_init(&config, 0, 2, 1, 0) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+       /* wq_log2 > RTE_RED_WQ_LOG2_MAX */
+       if (rte_red_config_init(&config,
+                       RTE_RED_WQ_LOG2_MAX + 1, 1, 2, 0) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+       /* wq_log2 < RTE_RED_WQ_LOG2_MIN */
+       if (rte_red_config_init(&config,
+                       RTE_RED_WQ_LOG2_MIN - 1, 1, 2, 0) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+       /* maxp_inv > RTE_RED_MAXP_INV_MAX */
+       if (rte_red_config_init(&config,
+                       RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MAX + 1) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+       /* maxp_inv < RTE_RED_MAXP_INV_MIN */
+       if (rte_red_config_init(&config,
+                       RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MIN - 1) == 0) {
+               printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+               return -1;
+       }
+
+       return 0;
+}
+
+int test_red(void)
+{
+       uint32_t num_tests = 0;
+       uint32_t num_pass = 0;
+       int ret = 0;
+
+       if (test_invalid_parameters() < 0)
+               return -1;
+
+       run_tests(func_tests, DIM(func_tests), &num_tests, &num_pass);
+       run_tests(perf_tests, DIM(perf_tests), &num_tests, &num_pass);
+
+       if (num_pass == num_tests) {
+               printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+               ret = 0;
+       } else {
+               printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass, num_tests - num_pass);
+               ret = -1;
+       }
+       return (ret);
+}
+
+#else
+
+int
+test_red(void)
+{
+       printf("The SCHED library is not included in this build\n");
+       return 0;
+}
+
+#endif
diff --git a/app/test/test_sched.c b/app/test/test_sched.c
new file mode 100755 (executable)
index 0000000..a0efa52
--- /dev/null
@@ -0,0 +1,244 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#if defined(RTE_LIBRTE_SCHED) && defined(RTE_ARCH_X86_64)
+
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+#include <rte_sched.h>
+
+
+#define VERIFY(exp,fmt,args...)                                        \
+               if (!(exp)) {                                               \
+                       printf(fmt, ##args);                                    \
+                       return -1;                                              \
+               }
+
+
+#define SUBPORT        0
+#define PIPE           1
+#define TC                     2
+#define QUEUE          3
+
+static struct rte_sched_subport_params subport_param[] = {
+       {
+               .tb_rate = 1250000000,
+               .tb_size = 1000000,
+
+               .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+               .tc_period = 10,
+       },
+};
+
+static struct rte_sched_pipe_params pipe_profile[] = {
+       { /* Profile #0 */
+               .tb_rate = 305175,
+               .tb_size = 1000000,
+
+               .tc_rate = {305175, 305175, 305175, 305175},
+               .tc_period = 40,
+
+               .wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
+       },
+};
+
+static struct rte_sched_port_params port_param = {
+       .name = "port_0",
+       .socket = 0, /* computed */
+       .rate = 0, /* computed */
+       .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+       .n_subports_per_port = 1,
+       .n_pipes_per_subport = 4096,
+       .qsize = {64, 64, 64, 64},
+       .pipe_profiles = pipe_profile,
+       .n_pipe_profiles = 1,
+};
+
+#define NB_MBUF          32
+#define MAX_PACKET_SZ    2048
+#define MBUF_SZ (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define PKT_BURST_SZ     32
+#define MEMPOOL_CACHE_SZ PKT_BURST_SZ
+#define SOCKET           0
+
+
+static struct rte_mempool *
+create_mempool(void)
+{
+       struct rte_mempool * mp;
+
+       mp = rte_mempool_lookup("test_sched");
+       if (!mp)
+               mp = rte_mempool_create("test_sched",
+                               NB_MBUF,
+                               MBUF_SZ,
+                               MEMPOOL_CACHE_SZ,
+                               sizeof(struct rte_pktmbuf_pool_private),
+                               rte_pktmbuf_pool_init,
+                               NULL,
+                               rte_pktmbuf_init,
+                               NULL,
+                               SOCKET,
+                               0);
+
+       return mp;
+}
+
+static void
+prepare_pkt(struct rte_mbuf *mbuf)
+{
+       struct ether_hdr *eth_hdr;
+       struct vlan_hdr *vlan1, *vlan2;
+       struct ipv4_hdr *ip_hdr;
+
+       /* Simulate a classifier */
+       eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+       vlan1 = (struct vlan_hdr *)(&eth_hdr->ether_type );
+       vlan2 = (struct vlan_hdr *)((uintptr_t)&eth_hdr->ether_type + sizeof(struct vlan_hdr));
+       eth_hdr = (struct ether_hdr *)((uintptr_t)&eth_hdr->ether_type + 2 *sizeof(struct vlan_hdr));
+       ip_hdr = (struct ipv4_hdr *)((uintptr_t)eth_hdr +  sizeof(eth_hdr->ether_type));
+
+       vlan1->vlan_tci = rte_cpu_to_be_16(SUBPORT);
+       vlan2->vlan_tci = rte_cpu_to_be_16(PIPE);
+       eth_hdr->ether_type =  rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+       ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE);
+
+
+       rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE, e_RTE_METER_YELLOW);
+
+       /* 64 byte packet */
+       mbuf->pkt.pkt_len  = 60;
+       mbuf->pkt.data_len = 60;
+}
+
+
+/**
+ * test main entrance for library sched
+ */
+int 
+test_sched(void)
+{
+       struct rte_mempool *mp = NULL;
+       struct rte_sched_port *port = NULL;
+       uint32_t pipe;
+       struct rte_mbuf *in_mbufs[10];
+       struct rte_mbuf *out_mbufs[10];
+       int i;
+
+       int err;
+
+       mp = create_mempool();
+
+       port_param.socket = 0;
+       port_param.rate = (uint64_t) 10000 * 1000 * 1000 / 8;
+       port_param.name = "port_0";
+
+       port = rte_sched_port_config(&port_param);
+       VERIFY(port != NULL, "Error config sched port\n");
+
+       
+       err = rte_sched_subport_config(port, SUBPORT, subport_param);
+       VERIFY(err == 0, "Error config sched, err=%d\n", err);
+
+       for (pipe = 0; pipe < port_param.n_pipes_per_subport; pipe ++) {
+               err = rte_sched_pipe_config(port, SUBPORT, pipe, 0);
+               VERIFY(err == 0, "Error config sched pipe %u, err=%d\n", pipe, err);
+       }
+
+       for (i = 0; i < 10; i++) {
+               in_mbufs[i] = rte_pktmbuf_alloc(mp);
+               prepare_pkt(in_mbufs[i]);
+       }
+
+
+       err = rte_sched_port_enqueue(port, in_mbufs, 10);
+       VERIFY(err == 10, "Wrong enqueue, err=%d\n", err);
+
+       err = rte_sched_port_dequeue(port, out_mbufs, 10);
+       VERIFY(err == 10, "Wrong dequeue, err=%d\n", err);
+
+       for (i = 0; i < 10; i++) {
+               enum rte_meter_color color;
+               uint32_t subport, traffic_class, queue;
+
+               color = rte_sched_port_pkt_read_color(out_mbufs[i]);
+               VERIFY(color == e_RTE_METER_YELLOW, "Wrong color\n");
+
+               rte_sched_port_pkt_read_tree_path(out_mbufs[i],
+                               &subport, &pipe, &traffic_class, &queue);
+
+               VERIFY(subport == SUBPORT, "Wrong subport\n");
+               VERIFY(pipe == PIPE, "Wrong pipe\n");
+               VERIFY(traffic_class == TC, "Wrong traffic_class\n");
+               VERIFY(queue == QUEUE, "Wrong queue\n");
+
+       }
+
+
+       struct rte_sched_subport_stats subport_stats;
+       uint32_t tc_ov;
+       rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov);
+       //VERIFY(subport_stats.n_pkts_tc[TC-1] == 10, "Wrong subport stats\n");
+
+       struct rte_sched_queue_stats queue_stats;
+       uint16_t qlen;
+       rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
+       //VERIFY(queue_stats.n_pkts == 10, "Wrong queue stats\n");
+
+       rte_sched_port_free(port);
+
+       return 0;
+}
+
+#else /* RTE_LIBRTE_SCHED */
+
+int
+test_sched(void)
+{
+       printf("The Scheduler library is not included in this build\n");
+       return 0;
+}
+#endif /* RTE_LIBRTE_SCHED */
index 5960e85..a63d37a 100644 (file)
@@ -233,6 +233,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 
+#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
 #
 # Compile librte_kni
 #
index 6c6a59d..cf86ba5 100644 (file)
@@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 
+#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
 #
 # Compile librte_kni
 #
index 1dcc8c6..b5d3362 100644 (file)
@@ -235,6 +235,17 @@ CONFIG_RTE_LIBRTE_NET=y
 CONFIG_RTE_LIBRTE_METER=y
 
 #
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
+#
+# Compile the test application
 # Compile librte_kni
 #
 CONFIG_RTE_LIBRTE_KNI=y
index 7053a10..60f10af 100644 (file)
@@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 
+#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
 #
 # Compile librte_kni
 #
diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile
new file mode 100755 (executable)
index 0000000..08f4d19
--- /dev/null
@@ -0,0 +1,58 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without 
+#   modification, are permitted provided that the following conditions 
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright 
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright 
+#       notice, this list of conditions and the following disclaimer in 
+#       the documentation and/or other materials provided with the 
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its 
+#       contributors may be used to endorse or promote products derived 
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-default-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# binary name
+APP = qos_sched
+
+# all source are stored in SRCS-y
+SRCS-y := main.c args.c init.c app_thread.c cfg_file.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_sched
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
new file mode 100755 (executable)
index 0000000..afce5ef
--- /dev/null
@@ -0,0 +1,302 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_sched.h>
+
+#include "main.h"
+
+/*
+ * QoS parameters are encoded as follows:
+ *             Outer VLAN ID defines subport
+ *             Inner VLAN ID defines pipe
+ *             Destination IP 0.0.XXX.0 defines traffic class
+ *             Destination IP host (0.0.0.XXX) defines queue
+ * Values below define offset to each field from start of frame
+ */
+#define SUBPORT_OFFSET 7
+#define PIPE_OFFSET            9
+#define TC_OFFSET              20
+#define QUEUE_OFFSET   20
+#define COLOR_OFFSET   19
+
+static inline int
+get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe,
+                       uint32_t *traffic_class, uint32_t *queue, uint32_t *color)
+{
+       uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *);
+
+       *subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) &
+                       (port_params.n_subports_per_port - 1); /* Outer VLAN ID*/
+       *pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) &
+                       (port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */
+       *traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) &
+                       (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */
+       *queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) &
+                       (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */
+       *color = pdata[COLOR_OFFSET] & 0x03;    /* Destination IP */
+
+       return 0;
+}
+
+void
+app_rx_thread(struct thread_conf **confs)
+{
+       uint32_t i, nb_rx;
+       struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned;
+       struct thread_conf *conf;
+       int conf_idx = 0;
+
+       uint32_t subport;
+       uint32_t pipe;
+       uint32_t traffic_class;
+       uint32_t queue;
+       uint32_t color;
+
+       while ((conf = confs[conf_idx])) {
+               nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs,
+                               burst_conf.rx_burst);
+
+               if (likely(nb_rx != 0)) {
+                       APP_STATS_ADD(conf->stat.nb_rx, nb_rx);
+
+                       for(i = 0; i < nb_rx; i++) {
+                               get_pkt_sched(rx_mbufs[i],
+                                               &subport, &pipe, &traffic_class, &queue, &color);
+                               rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe,
+                                               traffic_class, queue, (enum rte_meter_color) color);
+                       }
+
+                       if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring,
+                                                               (void **)rx_mbufs, nb_rx) != 0)) {
+                               for(i = 0; i < nb_rx; i++) {
+                                       rte_pktmbuf_free(rx_mbufs[i]);
+
+                                       APP_STATS_ADD(conf->stat.nb_drop, 1);
+                               }
+                       }
+               }
+               conf_idx++;
+               if (confs[conf_idx] == NULL)
+                       conf_idx = 0;
+       }
+}
+
+
+
+/* Send the packet to an output interface
+ * For performance reason function returns number of packets dropped, not sent,
+ * so 0 means that all packets were sent successfully
+ */
+
+static inline void
+app_send_burst(struct thread_conf *qconf)
+{
+       struct rte_mbuf **mbufs;
+       uint32_t n, ret;
+
+       mbufs = (struct rte_mbuf **)qconf->m_table;
+       n = qconf->n_mbufs;
+
+       do {
+               ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
+               if (unlikely(ret < n)) { /* we cannot drop the packets, so re-send */
+                       /* update number of packets to be sent */
+                       n -= ret;
+                       mbufs = (struct rte_mbuf **)&mbufs[ret];
+                       /* limit number of retries to avoid endless loop */
+                       /* reset retry counter if some packets were sent */
+                       if (likely(ret != 0)) {
+                               continue;
+                       }
+               }
+       } while (ret != n);
+}
+
+
+/* Send the packet to an output interface */
+static void
+app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
+{
+       uint32_t i, len;
+
+       len = qconf->n_mbufs;
+       for(i = 0; i < nb_pkt; i++) {
+               qconf->m_table[len] = mbufs[i];
+               len++;
+               /* enough pkts to be sent */
+               if (unlikely(len == burst_conf.tx_burst)) {
+                       qconf->n_mbufs = len;
+                       app_send_burst(qconf);
+                       len = 0;
+               }
+       }
+
+       qconf->n_mbufs = len;
+}
+
+void
+app_tx_thread(struct thread_conf **confs)
+{
+       struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
+       struct thread_conf *conf;
+       int conf_idx = 0;
+       int retval;
+       const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+       while ((conf = confs[conf_idx])) {
+               retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
+                                       burst_conf.qos_dequeue);
+               if (likely(retval == 0)) {
+                       app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
+
+                       conf->counter = 0; /* reset empty read loop counter */
+               }
+
+               conf->counter++;
+
+               /* drain ring and TX queues */
+               if (unlikely(conf->counter > drain_tsc)) {
+                       /* now check is there any packets left to be transmitted */
+                       if (conf->n_mbufs != 0) {
+                               app_send_burst(conf);
+
+                               conf->n_mbufs = 0;
+                       }
+                       conf->counter = 0;
+               }
+
+               conf_idx++;
+               if (confs[conf_idx] == NULL)
+                       conf_idx = 0;
+       }
+}
+
+
+void
+app_worker_thread(struct thread_conf **confs)
+{
+       struct rte_mbuf *mbufs[burst_conf.ring_burst];
+       struct thread_conf *conf;
+       int conf_idx = 0;
+
+       while ((conf = confs[conf_idx])) {
+               uint32_t nb_pkt;
+               int retval;
+
+               /* Read packet from the ring */
+               retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+                                       burst_conf.ring_burst);
+               if (likely(retval == 0)) {
+                       int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+                                       burst_conf.ring_burst);
+
+                       APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+                       APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+               }
+
+               nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+                                       burst_conf.qos_dequeue);
+               if (likely(nb_pkt > 0))
+                       while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0);
+
+               conf_idx++;
+               if (confs[conf_idx] == NULL)
+                       conf_idx = 0;
+       }
+}
+
+
+void
+app_mixed_thread(struct thread_conf **confs)
+{
+       struct rte_mbuf *mbufs[burst_conf.ring_burst];
+       struct thread_conf *conf;
+       int conf_idx = 0;
+       const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+       while ((conf = confs[conf_idx])) {
+               uint32_t nb_pkt;
+               int retval;
+
+               /* Read packet from the ring */
+               retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+                                       burst_conf.ring_burst);
+               if (likely(retval == 0)) {
+                       int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+                                       burst_conf.ring_burst);
+
+                       APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+                       APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+               }
+
+
+               nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+                                       burst_conf.qos_dequeue);
+               if (likely(nb_pkt > 0)) {
+                       app_send_packets(conf, mbufs, nb_pkt);
+
+                       conf->counter = 0; /* reset empty read loop counter */
+               }
+
+               conf->counter++;
+
+               /* drain ring and TX queues */
+               if (unlikely(conf->counter > drain_tsc)) {
+
+                       /* now check is there any packets left to be transmitted */
+                       if (conf->n_mbufs != 0) {
+                               app_send_burst(conf);
+
+                               conf->n_mbufs = 0;
+                       }
+                       conf->counter = 0;
+               }
+
+               conf_idx++;
+               if (confs[conf_idx] == NULL)
+                       conf_idx = 0;
+       }
+}
+
+
diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
new file mode 100755 (executable)
index 0000000..c9cfdb2
--- /dev/null
@@ -0,0 +1,467 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <locale.h>
+#include <unistd.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+
+#define APP_NAME "qos_sched"
+#define MAX_OPT_VALUES 8
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/"
+
+static uint32_t app_master_core = 1;
+static uint32_t app_numa_mask;
+static uint64_t app_used_core_mask = 0;
+static uint64_t app_used_port_mask = 0;
+static uint64_t app_used_rx_port_mask = 0;
+static uint64_t app_used_tx_port_mask = 0;
+
+
+static const char usage[] =
+       "                                                                               \n"
+       "    %s <APP PARAMS>                                                            \n"
+       "                                                                               \n"
+       "Application mandatory parameters:                                              \n"
+       "    --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n"
+       "           multiple pfc can be configured in command line                      \n"
+       "                                                                               \n"
+       "Application optional parameters:                                               \n"
+       "    --mst I : master core index (default value is %u)                          \n" 
+       "    --rsz \"A, B, C\" :   Ring sizes                                           \n"
+       "           A = Size (in number of buffer descriptors) of each of the NIC RX    \n"
+       "               rings read by the I/O RX lcores (default value is %u)           \n"
+       "           B = Size (in number of elements) of each of the SW rings used by the\n"
+       "               I/O RX lcores to send packets to worker lcores (default value is\n"
+       "               %u)                                                             \n"
+       "           C = Size (in number of buffer descriptors) of each of the NIC TX    \n"
+       "               rings written by worker lcores (default value is %u)            \n"
+       "    --bsz \"A, B, C, D\": Burst sizes                                          \n"
+       "           A = I/O RX lcore read burst size from NIC RX (default value is %u)  \n"
+       "           B = I/O RX lcore write burst size to output SW rings,               \n"
+       "               Worker lcore read burst size from input SW rings,               \n"
+       "               QoS enqueue size (default value is %u)                          \n"
+       "           C = QoS dequeue size (default value is %u)                          \n"
+       "           D = Worker lcore write burst size to NIC TX (default value is %u)   \n"
+       "    --rth \"A, B, C\" :   RX queue threshold parameters                        \n"
+       "           A = RX prefetch threshold (default value is %u)                     \n"
+       "           B = RX host threshold (default value is %u)                         \n"
+       "           C = RX write-back threshold (default value is %u)                   \n"
+       "    --tth \"A, B, C\" :   TX queue threshold parameters                        \n"
+       "           A = TX prefetch threshold (default value is %u)                     \n"
+       "           B = TX host threshold (default value is %u)                         \n"
+       "           C = TX write-back threshold (default value is %u)                   \n"
+       "    --cfg FILE : profile configuration to load                                 \n"
+;
+
+/* display usage */
+static void
+app_usage(const char *prgname)
+{
+       printf(usage, prgname, app_master_core,
+               APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT,
+               MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE, MAX_PKT_TX_BURST,
+               RX_PTHRESH, RX_HTHRESH, RX_WTHRESH,
+               TX_PTHRESH, TX_HTHRESH, TX_WTHRESH
+               );
+}
+
+static inline int str_is(const char *str, const char *is)
+{
+       return (strcmp(str, is) == 0);
+}
+
+/* returns core mask used by DPDK */
+static uint64_t
+app_eal_core_mask(void)
+{
+       uint32_t i;
+       uint64_t cm = 0;
+       struct rte_config *cfg = rte_eal_get_configuration();
+
+       for (i = 0; i < RTE_MAX_LCORE; i++) {
+               if (cfg->lcore_role[i] == ROLE_RTE)
+                       cm |= (1ULL << i);
+       }
+
+       cm |= (1ULL << cfg->master_lcore);
+
+       return cm;
+}
+
+
+/* returns total number of cores presented in a system */
+static uint32_t
+app_cpu_core_count(void)
+{
+       int i, len;
+       char path[PATH_MAX];
+       uint32_t ncores = 0;
+
+       for(i = 0; i < RTE_MAX_LCORE; i++) {
+               len = rte_snprintf(path, sizeof(path), SYS_CPU_DIR, i);
+               if (len <= 0 || (unsigned)len >= sizeof(path))
+                       continue;
+
+               if (access(path, F_OK) == 0)
+                       ncores++;
+       }
+
+       return ncores;
+}
+
+/* returns:
+        number of values parsed
+       -1 in case of error
+*/
+static int
+app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals)
+{
+       char *string;
+       uint32_t i, n_tokens;
+       char *tokens[MAX_OPT_VALUES];
+
+       if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES)
+               return -1;
+
+       /* duplicate configuration string before splitting it to tokens */
+       string = strdup(conf_str);
+       if (string == NULL)
+               return -1;
+
+       n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator);
+
+       for(i = 0; i < n_tokens; i++) {
+               opt_vals[i] = (uint32_t)atol(tokens[i]);
+       }
+       
+       free(string);
+
+       return n_tokens;
+}
+
+static int
+app_parse_ring_conf(const char *conf_str)
+{
+       int ret;
+       uint32_t vals[3];
+
+       ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+       if (ret != 3)   
+               return ret;
+
+       ring_conf.rx_size = vals[0];
+       ring_conf.ring_size = vals[1];
+       ring_conf.tx_size = vals[2];
+
+       return 0;
+}
+
+static int
+app_parse_rth_conf(const char *conf_str)
+{
+       int ret;
+       uint32_t vals[3];
+
+       ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+       if (ret != 3)   
+               return ret;
+
+       rx_thresh.pthresh = (uint8_t)vals[0];
+       rx_thresh.hthresh = (uint8_t)vals[1];
+       rx_thresh.wthresh = (uint8_t)vals[2];
+
+       return 0;
+}
+
+static int
+app_parse_tth_conf(const char *conf_str)
+{
+       int ret;
+       uint32_t vals[3];
+
+       ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+       if (ret != 3)   
+               return ret;
+
+       tx_thresh.pthresh = (uint8_t)vals[0];
+       tx_thresh.hthresh = (uint8_t)vals[1];
+       tx_thresh.wthresh = (uint8_t)vals[2];
+
+       return 0;
+}
+
+static int
+app_parse_flow_conf(const char *conf_str)
+{
+       int ret;
+       uint32_t vals[5];
+       struct flow_conf *pconf;
+       uint64_t mask;
+
+       ret = app_parse_opt_vals(conf_str, ',', 6, vals);
+       if (ret < 4 || ret > 5)
+               return ret;
+
+       pconf = &qos_conf[nb_pfc];
+
+       pconf->rx_port = (uint8_t)vals[0];
+       pconf->tx_port = (uint8_t)vals[1];
+       pconf->rx_core = (uint8_t)vals[2];
+       pconf->wt_core = (uint8_t)vals[3];
+       if (ret == 5)
+               pconf->tx_core = (uint8_t)vals[4];
+       else
+               pconf->tx_core = pconf->wt_core;
+
+       if (pconf->rx_core == pconf->wt_core) {
+               RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc);
+               return -1;
+       }
+
+       if (pconf->rx_port >= RTE_MAX_ETHPORTS) {
+               RTE_LOG(ERR, APP, "pfc %u: invalid rx port %hu index\n", nb_pfc, pconf->rx_port);
+               return -1;
+       }
+       if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
+               RTE_LOG(ERR, APP, "pfc %u: invalid tx port %hu index\n", nb_pfc, pconf->rx_port);
+               return -1;
+       }
+
+       mask = 1lu << pconf->rx_port;
+       if (app_used_rx_port_mask & mask) {
+               RTE_LOG(ERR, APP, "pfc %u: rx port %hu is used already\n", nb_pfc, pconf->rx_port);
+               return -1;
+       }
+       app_used_rx_port_mask |= mask;
+       app_used_port_mask |= mask;
+
+       mask = 1lu << pconf->tx_port;
+       if (app_used_tx_port_mask & mask) {
+               RTE_LOG(ERR, APP, "pfc %u: port %hu is used already\n", nb_pfc, pconf->tx_port);
+               return -1;
+       }
+       app_used_tx_port_mask |= mask;
+       app_used_port_mask |= mask;
+
+       mask = 1lu << pconf->rx_core;
+       app_used_core_mask |= mask;
+
+       mask = 1lu << pconf->wt_core;
+       app_used_core_mask |= mask;
+
+       mask = 1lu << pconf->tx_core;
+       app_used_core_mask |= mask;
+
+       nb_pfc++;
+
+       return 0;
+}
+
+static int
+app_parse_burst_conf(const char *conf_str)
+{
+       int ret;
+       uint32_t vals[4];
+
+       ret = app_parse_opt_vals(conf_str, ',', 4, vals);
+       if (ret != 4)
+               return ret;
+
+       burst_conf.rx_burst    = (uint16_t)vals[0];
+       burst_conf.ring_burst  = (uint16_t)vals[1];
+       burst_conf.qos_dequeue = (uint16_t)vals[2];
+       burst_conf.tx_burst    = (uint16_t)vals[3];
+
+       return 0;
+}
+
+/* 
+ * Parses the argument given in the command line of the application,
+ * calculates mask for used cores and initializes EAL with calculated core mask
+ */
+int
+app_parse_args(int argc, char **argv)
+{
+       int opt, ret;
+       int option_index;
+       const char *optname;
+       char *prgname = argv[0];
+       uint32_t i, nb_lcores;
+
+       static struct option lgopts[] = {
+               { "pfc", 1, 0, 0 },
+               { "mst", 1, 0, 0 },
+               { "rsz", 1, 0, 0 },
+               { "bsz", 1, 0, 0 },
+               { "rth", 1, 0, 0 },
+               { "tth", 1, 0, 0 },
+               { "cfg", 1, 0, 0 },
+               { NULL,  0, 0, 0 }
+       };
+
+       /* initialize EAL first */
+       ret = rte_eal_init(argc, argv);
+       if (ret < 0)
+               return -1;
+
+       argc -= ret;
+       argv += ret;
+
+       /* set en_US locale to print big numbers with ',' */
+       setlocale(LC_NUMERIC, "en_US.utf-8");
+
+       while ((opt = getopt_long(argc, argv, "",
+               lgopts, &option_index)) != EOF) {
+
+                       switch (opt) {
+                       /* long options */
+                       case 0:
+                               optname = lgopts[option_index].name;
+                               if (str_is(optname, "pfc")) {
+                                       ret = app_parse_flow_conf(optarg);
+                                       if (ret) {
+                                               RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg);
+                                               return -1;
+                                       }
+                                       break;
+                               }
+                               if (str_is(optname, "mst")) {
+                                       app_master_core = (uint32_t)atoi(optarg);
+                                       break;
+                               }
+                               if (str_is(optname, "rsz")) {
+                                       ret = app_parse_ring_conf(optarg);
+                                       if (ret) {
+                                               RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg);
+                                               return -1;
+                                       }
+                                       break;
+                               }
+                               if (str_is(optname, "bsz")) {
+                                       ret = app_parse_burst_conf(optarg);
+                                       if (ret) {
+                                               RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg);
+                                               return -1;
+                                       }
+                                       break;
+                               }
+                               if (str_is(optname, "rth")) {
+                                       ret = app_parse_rth_conf(optarg);
+                                       if (ret) {
+                                               RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg);
+                                               return -1;
+                                       }
+                                       break;
+                               }
+                               if (str_is(optname, "tth")) {
+                                       ret = app_parse_tth_conf(optarg);
+                                       if (ret) {
+                                               RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg);
+                                               return -1;
+                                       }
+                                       break;
+                               }
+                               if (str_is(optname, "cfg")) {
+                                       cfg_profile = optarg;
+                                       break;
+                               }
+                               break;
+
+                       default:
+                               app_usage(prgname);
+                               return -1;
+                       }
+       }
+
+       /* check master core index validity */
+       for(i = 0; i <= app_master_core; i++) {
+               if (app_used_core_mask & (1u << app_master_core)) {
+                       RTE_LOG(ERR, APP, "Master core index is not configured properly\n");
+                       app_usage(prgname);
+                       return -1;
+               }
+       }
+       app_used_core_mask |= 1u << app_master_core;
+
+       if ((app_used_core_mask != app_eal_core_mask()) ||
+                       (app_master_core != rte_get_master_lcore())) {
+               RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64
+                               " instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask());
+               return -1;
+       }
+
+       if (nb_pfc == 0) {
+               RTE_LOG(ERR, APP, "Packet flow not configured!\n");
+               app_usage(prgname);
+               return -1;
+       }
+
+       /* sanity check for cores assignment */
+       nb_lcores = app_cpu_core_count();
+
+       for(i = 0; i < nb_pfc; i++) {
+               if (qos_conf[i].rx_core >= nb_lcores) {
+                       RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1,
+                                       qos_conf[i].rx_core);
+                       return -1;
+               }
+               if (qos_conf[i].wt_core >= nb_lcores) {
+                       RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1,
+                                       qos_conf[i].wt_core);
+                       return -1;
+               }
+               uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+               uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core);
+               if (rx_sock != wt_sock) {
+                       RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1);
+                       return -1;
+               }
+               app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core);
+       }
+
+       return 0;
+}
+
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
new file mode 100755 (executable)
index 0000000..85f9c02
--- /dev/null
@@ -0,0 +1,631 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <rte_string_fns.h>
+#include <rte_sched.h>
+
+#include "cfg_file.h"
+#include "main.h"
+
+
+/** when we resize a file structure, how many extra entries
+ * for new sections do we add in */
+#define CFG_ALLOC_SECTION_BATCH 8
+/** when we resize a section structure, how many extra entries
+ * for new entries do we add in */
+#define CFG_ALLOC_ENTRY_BATCH 16
+
+static unsigned
+_strip(char *str, unsigned len)
+{
+       int newlen = len;
+       if (len == 0)
+               return 0;
+
+       if (isspace(str[len-1])) {
+               /* strip trailing whitespace */
+               while (newlen > 0 && isspace(str[newlen - 1]))
+                       str[--newlen] = '\0';
+       }
+
+       if (isspace(str[0])) {
+               /* strip leading whitespace */
+               int i,start = 1;
+               while (isspace(str[start]) && start < newlen)
+                       start++
+                       ; /* do nothing */
+               newlen -= start;
+               for (i = 0; i < newlen; i++)
+                       str[i] = str[i+start];
+               str[i] = '\0';
+       }
+       return newlen;
+}
+
+struct cfg_file *
+cfg_load(const char *filename, int flags)
+{
+       int allocated_sections = CFG_ALLOC_SECTION_BATCH;
+       int allocated_entries = 0;
+       int curr_section = -1;
+       int curr_entry = -1;
+       char buffer[256];
+       int lineno = 0;
+       struct cfg_file *cfg = NULL;
+
+       FILE *f = fopen(filename, "r");
+       if (f == NULL)
+               return NULL;
+
+       cfg = malloc(sizeof(*cfg) +     sizeof(cfg->sections[0]) * allocated_sections);
+       if (cfg == NULL)
+               goto error2;
+
+       memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections);
+
+       while (fgets(buffer, sizeof(buffer), f) != NULL) {
+               char *pos = NULL;
+               size_t len = strnlen(buffer, sizeof(buffer));
+               lineno++;
+               if (len >=sizeof(buffer) - 1 && buffer[len-1] != '\n'){
+                       printf("Error line %d - no \\n found on string. "
+                                       "Check if line too long\n", lineno);
+                       goto error1;
+               }
+               if ((pos = memchr(buffer, ';', sizeof(buffer))) != NULL) {
+                       *pos = '\0';
+                       len = pos -  buffer;
+               }
+
+               len = _strip(buffer, len);
+               if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL)
+                       continue;
+
+               if (buffer[0] == '[') {
+                       /* section heading line */
+                       char *end = memchr(buffer, ']', len);
+                       if (end == NULL) {
+                               printf("Error line %d - no terminating '[' found\n", lineno);
+                               goto error1;
+                       }
+                       *end = '\0';
+                       _strip(&buffer[1], end - &buffer[1]);
+
+                       /* close off old section and add start new one */
+                       if (curr_section >= 0)
+                               cfg->sections[curr_section]->num_entries = curr_entry + 1;
+                       curr_section++;
+
+                       /* resize overall struct if we don't have room for more sections */
+                       if (curr_section == allocated_sections) {
+                               allocated_sections += CFG_ALLOC_SECTION_BATCH;
+                               struct cfg_file *n_cfg = realloc(cfg, sizeof(*cfg) +
+                                               sizeof(cfg->sections[0]) * allocated_sections);
+                               if (n_cfg == NULL) {
+                                       printf("Error - no more memory\n");
+                                       goto error1;
+                               }
+                               cfg = n_cfg;
+                       }
+
+                       /* allocate space for new section */
+                       allocated_entries = CFG_ALLOC_ENTRY_BATCH;
+                       curr_entry = -1;
+                       cfg->sections[curr_section] = malloc(sizeof(*cfg->sections[0]) +
+                                       sizeof(cfg->sections[0]->entries[0]) * allocated_entries);
+                       if (cfg->sections[curr_section] == NULL) {
+                               printf("Error - no more memory\n");
+                               goto error1;
+                       }
+
+                       rte_snprintf(cfg->sections[curr_section]->name,
+                                       sizeof(cfg->sections[0]->name),
+                                       "%s", &buffer[1]);
+               }
+               else {
+                       /* value line */
+                       if (curr_section < 0) {
+                               printf("Error line %d - value outside of section\n", lineno);
+                               goto error1;
+                       }
+
+                       struct cfg_section *sect = cfg->sections[curr_section];
+                       char *split[2];
+                       if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') != 2) {
+                               printf("Error at line %d - cannot split string\n", lineno);
+                               goto error1;
+                       }
+
+                       curr_entry++;
+                       if (curr_entry == allocated_entries) {
+                               allocated_entries += CFG_ALLOC_ENTRY_BATCH;
+                               struct cfg_section *n_sect = realloc(sect, sizeof(*sect) +
+                                               sizeof(sect->entries[0]) * allocated_entries);
+                               if (n_sect == NULL) {
+                                       printf("Error - no more memory\n");
+                                       goto error1;
+                               }
+                               sect = cfg->sections[curr_section] = n_sect;
+                       }
+
+                       sect->entries[curr_entry] = malloc(sizeof(*sect->entries[0]));
+                       if (sect->entries[curr_entry] == NULL) {
+                               printf("Error - no more memory\n");
+                               goto error1;
+                       }
+
+                       struct cfg_entry *entry = sect->entries[curr_entry];
+                       rte_snprintf(entry->name, sizeof(entry->name), "%s", split[0]);
+                       rte_snprintf(entry->value, sizeof(entry->value), "%s", split[1]);
+                       _strip(entry->name, strnlen(entry->name, sizeof(entry->name)));
+                       _strip(entry->value, strnlen(entry->value, sizeof(entry->value)));
+               }
+       }
+       fclose(f);
+       cfg->flags = flags;
+       cfg->sections[curr_section]->num_entries = curr_entry + 1;
+       cfg->num_sections = curr_section + 1;
+       return cfg;
+
+error1:
+       cfg_close(cfg);
+error2:
+       fclose(f);
+       return NULL;
+}
+
+
+int cfg_close(struct cfg_file *cfg)
+{
+       int i, j;
+
+       if (cfg == NULL)
+               return -1;
+
+       for(i = 0; i < cfg->num_sections; i++) {
+               if (cfg->sections[i] != NULL) {
+                       if (cfg->sections[i]->num_entries) {
+                               for(j = 0; j < cfg->sections[i]->num_entries; j++) {
+                                       if (cfg->sections[i]->entries[j] != NULL)
+                                               free(cfg->sections[i]->entries[j]);
+                               }
+                       }
+                       free(cfg->sections[i]);
+               }
+       }
+       free(cfg);
+
+       return 0;
+}
+
+int
+cfg_num_sections(struct cfg_file *cfg, const char *sectionname, size_t length)
+{
+       int i;
+       int num_sections = 0;
+       for (i = 0; i < cfg->num_sections; i++) {
+               if (strncmp(cfg->sections[i]->name, sectionname, length) == 0)
+                       num_sections++;
+       }
+       return num_sections;
+}
+
+int
+cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections)
+{
+       int i;
+       for (i = 0; i < cfg->num_sections && i < max_sections; i++) {
+               rte_snprintf(sections[i], CFG_NAME_LEN, "%s",  cfg->sections[i]->name);
+       }
+       return i;
+}
+
+static const struct cfg_section *
+_get_section(struct cfg_file *cfg, const char *sectionname)
+{
+       int i;
+       for (i = 0; i < cfg->num_sections; i++) {
+               if (strncmp(cfg->sections[i]->name, sectionname,
+                               sizeof(cfg->sections[0]->name)) == 0)
+                       return cfg->sections[i];
+       }
+       return NULL;
+}
+
+int
+cfg_has_section(struct cfg_file *cfg, const char *sectionname)
+{
+       return (_get_section(cfg, sectionname) != NULL);
+}
+
+int
+cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname)
+{
+       const struct cfg_section *s = _get_section(cfg, sectionname);
+       if (s == NULL)
+               return -1;
+       return s->num_entries;
+}
+
+
+int
+cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
+               struct cfg_entry *entries, int max_entries)
+{
+       int i;
+       const struct cfg_section *sect = _get_section(cfg, sectionname);
+       if (sect == NULL)
+               return -1;
+       for (i = 0; i < max_entries && i < sect->num_entries; i++)
+               entries[i] = *sect->entries[i];
+       return i;
+}
+
+const char *
+cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
+               const char *entryname)
+{
+       int i;
+       const struct cfg_section *sect = _get_section(cfg, sectionname);
+       if (sect == NULL)
+               return NULL;
+       for (i = 0; i < sect->num_entries; i++)
+               if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) == 0)
+                       return sect->entries[i]->value;
+       return NULL;
+}
+
+int
+cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
+               const char *entryname)
+{
+       return (cfg_get_entry(cfg, sectionname, entryname) != NULL);
+}
+
+
+int
+cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port_params)
+{
+       const char *entry;
+       int j;
+
+       if (!cfg || !port_params)
+               return -1;
+
+       entry = cfg_get_entry(cfg, "port", "frame overhead");
+       if (entry)
+               port_params->frame_overhead = (uint32_t)atoi(entry);
+
+       entry = cfg_get_entry(cfg, "port", "number of subports per port");
+       if (entry)
+               port_params->n_subports_per_port = (uint32_t)atoi(entry);
+       
+       entry = cfg_get_entry(cfg, "port", "number of pipes per subport");
+       if (entry)
+               port_params->n_pipes_per_subport = (uint32_t)atoi(entry);
+
+       entry = cfg_get_entry(cfg, "port", "queue sizes");
+       if (entry) {
+               char *next;
+               
+               for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+                       port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10);
+                       if (next == NULL)
+                               break;
+                       entry = next;
+               }
+       }
+
+#ifdef RTE_SCHED_RED
+       for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+               char str[32];
+
+               /* Parse WRED min thresholds */
+               rte_snprintf(str, sizeof(str), "tc %d wred min", j);
+               entry = cfg_get_entry(cfg, "red", str);
+               if (entry) {
+                       char *next;
+                       int k;
+                       /* for each packet colour (green, yellow, red) */
+                       for (k = 0; k < e_RTE_METER_COLORS; k++) {
+                               port_params->red_params[j][k].min_th
+                                       = (uint16_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+
+               /* Parse WRED max thresholds */
+               rte_snprintf(str, sizeof(str), "tc %d wred max", j);
+               entry = cfg_get_entry(cfg, "red", str);
+               if (entry) {
+                       char *next;
+                       int k;
+                       /* for each packet colour (green, yellow, red) */
+                       for (k = 0; k < e_RTE_METER_COLORS; k++) {
+                               port_params->red_params[j][k].max_th
+                                       = (uint16_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+
+               /* Parse WRED inverse mark probabilities */
+               rte_snprintf(str, sizeof(str), "tc %d wred inv prob", j);
+               entry = cfg_get_entry(cfg, "red", str);
+               if (entry) {
+                       char *next;
+                       int k;
+                       /* for each packet colour (green, yellow, red) */
+                       for (k = 0; k < e_RTE_METER_COLORS; k++) {
+                               port_params->red_params[j][k].maxp_inv
+                                       = (uint8_t)strtol(entry, &next, 10);
+
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+
+               /* Parse WRED EWMA filter weights */
+               rte_snprintf(str, sizeof(str), "tc %d wred weight", j);
+               entry = cfg_get_entry(cfg, "red", str);
+               if (entry) {
+                       char *next;
+                       int k;
+                       /* for each packet colour (green, yellow, red) */
+                       for (k = 0; k < e_RTE_METER_COLORS; k++) {
+                               port_params->red_params[j][k].wq_log2
+                                       = (uint8_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+       }
+#endif /* RTE_SCHED_RED */
+       
+       return 0;
+}
+
+int
+cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe_params)
+{
+       int i, j;
+       char *next;
+       const char *entry;
+       int profiles;
+
+       if (!cfg || !pipe_params)
+               return -1;
+
+       profiles = cfg_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1);
+       port_params.n_pipe_profiles = profiles;
+
+       for (j = 0; j < profiles; j++) {
+               char pipe_name[32];
+               rte_snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j);
+
+               entry = cfg_get_entry(cfg, pipe_name, "tb rate");
+               if (entry)
+                       pipe_params[j].tb_rate = (uint32_t)atoi(entry);
+
+               entry = cfg_get_entry(cfg, pipe_name, "tb size");
+               if (entry)
+                       pipe_params[j].tb_size = (uint32_t)atoi(entry);
+
+               entry = cfg_get_entry(cfg, pipe_name, "tc period");
+               if (entry)
+                       pipe_params[j].tc_period = (uint32_t)atoi(entry);
+
+               entry = cfg_get_entry(cfg, pipe_name, "tc 0 rate");
+               if (entry)
+                       pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry);
+                       
+               entry = cfg_get_entry(cfg, pipe_name, "tc 1 rate");
+               if (entry)
+                       pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry);
+                       
+               entry = cfg_get_entry(cfg, pipe_name, "tc 2 rate");
+               if (entry)
+                       pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry);
+                       
+               entry = cfg_get_entry(cfg, pipe_name, "tc 3 rate");
+               if (entry)
+                       pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+               entry = cfg_get_entry(cfg, pipe_name, "tc 0 oversubscription weight");
+               if (entry)
+                       pipe_params[j].tc_ov_weight[0] = (uint8_t)atoi(entry);
+                       
+               entry = cfg_get_entry(cfg, pipe_name, "tc 1 oversubscription weight");
+               if (entry)
+                       pipe_params[j].tc_ov_weight[1] = (uint8_t)atoi(entry);
+                       
+               entry = cfg_get_entry(cfg, pipe_name, "tc 2 oversubscription weight");
+               if (entry)
+                       pipe_params[j].tc_ov_weight[2] = (uint8_t)atoi(entry);
+                       
+               entry = cfg_get_entry(cfg, pipe_name, "tc 3 oversubscription weight");
+               if (entry)
+                       pipe_params[j].tc_ov_weight[3] = (uint8_t)atoi(entry);
+#endif
+
+               entry = cfg_get_entry(cfg, pipe_name, "tc 0 wrr weights");
+               if (entry) {
+                       for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+                               pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] =
+                                       (uint8_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+               entry = cfg_get_entry(cfg, pipe_name, "tc 1 wrr weights");
+               if (entry) {
+                       for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+                               pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] =
+                                       (uint8_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+               entry = cfg_get_entry(cfg, pipe_name, "tc 2 wrr weights");
+               if (entry) {
+                       for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+                               pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] =
+                                       (uint8_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+               entry = cfg_get_entry(cfg, pipe_name, "tc 3 wrr weights");
+               if (entry) {
+                       for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+                               pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] =
+                                       (uint8_t)strtol(entry, &next, 10);
+                               if (next == NULL)
+                                       break;
+                               entry = next;
+                       }
+               }
+       }
+       return 0;
+}
+
+int
+cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport_params)
+{
+       const char *entry;
+       int i, j, k;
+
+       if (!cfg || !subport_params)
+               return -1;
+
+       memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile));
+
+       for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
+               char sec_name[CFG_NAME_LEN];
+               rte_snprintf(sec_name, sizeof(sec_name), "subport %d", i);
+
+               if (cfg_has_section(cfg, sec_name)) {
+                       entry = cfg_get_entry(cfg, sec_name, "tb rate");
+                       if (entry)
+                               subport_params[i].tb_rate = (uint32_t)atoi(entry);
+
+                       entry = cfg_get_entry(cfg, sec_name, "tb size");
+                       if (entry)
+                               subport_params[i].tb_size = (uint32_t)atoi(entry);
+
+                       entry = cfg_get_entry(cfg, sec_name, "tc period");
+                       if (entry)
+                               subport_params[i].tc_period = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+                       entry = cfg_get_entry(cfg, sec_name, "tc oversubscription period");
+                       if (entry)
+                               subport_params[i].tc_ov_period = (uint32_t)atoi(entry);
+#endif
+
+                       entry = cfg_get_entry(cfg, sec_name, "tc 0 rate");
+                       if (entry)
+                               subport_params[i].tc_rate[0] = (uint32_t)atoi(entry);
+
+                       entry = cfg_get_entry(cfg, sec_name, "tc 1 rate");
+                       if (entry)
+                               subport_params[i].tc_rate[1] = (uint32_t)atoi(entry);
+
+                       entry = cfg_get_entry(cfg, sec_name, "tc 2 rate");
+                       if (entry)
+                               subport_params[i].tc_rate[2] = (uint32_t)atoi(entry);
+
+                       entry = cfg_get_entry(cfg, sec_name, "tc 3 rate");
+                       if (entry)
+                               subport_params[i].tc_rate[3] = (uint32_t)atoi(entry);
+
+                       int n_entries = cfg_section_num_entries(cfg, sec_name);
+                       struct cfg_entry entries[n_entries];
+
+                       cfg_section_entries(cfg, sec_name, entries, n_entries);
+
+                       for (j = 0; j < n_entries; j++) {
+                               if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) {
+                                       int profile;
+                                       char *tokens[2] = {NULL, NULL};
+                                       int n_tokens;
+                                       int begin, end;
+
+                                       profile = atoi(entries[j].value);
+                                       n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")],
+                                                       strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-');
+
+                                       begin =  atoi(tokens[0]);
+                                       if (n_tokens == 2)
+                                               end = atoi(tokens[1]);
+                                       else
+                                               end = begin;
+
+                                       if (end >= MAX_SCHED_PIPES || begin > end)
+                                               return -1;
+
+                                       for (k = begin; k <= end; k++) {
+                                               char profile_name[CFG_NAME_LEN];
+
+                                               rte_snprintf(profile_name, sizeof(profile_name),
+                                                               "pipe profile %d", profile);
+                                               if (cfg_has_section(cfg, profile_name))
+                                                       app_pipe_to_profile[i][k] = profile;
+                                               else
+                                                       rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n",
+                                                                       entries[j].value);
+
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return 0;
+}
+
+
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
new file mode 100755 (executable)
index 0000000..2e265e6
--- /dev/null
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __CFG_FILE_H__
+#define __CFG_FILE_H__
+
+#include <rte_sched.h>
+
+#define CFG_NAME_LEN 32
+#define CFG_VALUE_LEN 64
+
+struct cfg_entry {
+       char name[CFG_NAME_LEN];
+       char value[CFG_VALUE_LEN];
+};
+
+struct cfg_section {
+       char name[CFG_NAME_LEN];
+       int num_entries;
+       struct cfg_entry *entries[0];
+};
+
+struct cfg_file {
+       int flags;
+       int num_sections;
+       struct cfg_section *sections[0];
+};
+
+
+int cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port);
+
+int cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe);
+
+int cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport);
+
+
+/* reads a config file from disk and returns a handle to the config 
+ * 'flags' is reserved for future use and must be 0
+ */
+struct cfg_file *cfg_load(const char *filename, int flags);
+
+/* returns the number of sections in the config */
+int cfg_num_sections(struct cfg_file *cfg, const char *sec_name, size_t length);
+
+/* fills the array "sections" with the names of all the sections in the file
+ * (up to a max of max_sections).
+ * NOTE: buffers in the sections array must be at least CFG_NAME_LEN big
+ */
+int cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections);
+
+/* true if the named section exists, false otherwise */
+int cfg_has_section(struct cfg_file *cfg, const char *sectionname);
+
+/* returns the number of entries in a section */
+int cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname);
+
+/* returns the entries in a section as key-value pairs in the "entries" array */
+int cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
+               struct cfg_entry *entries, int max_entries);
+
+/* returns a pointer to the value of the named entry in the named section */
+const char *cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
+               const char *entryname);
+
+/* true if the given entry exists in the given section, false otherwise */
+int cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
+               const char *entryname);
+
+/* cleans up memory allocated by cfg_load() */
+int cfg_close(struct cfg_file *cfg);
+
+#endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
new file mode 100755 (executable)
index 0000000..1654c73
--- /dev/null
@@ -0,0 +1,385 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdint.h>
+#include <memory.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_debug.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_sched.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+#include "cfg_file.h"
+
+uint32_t app_numa_mask = 0;
+static uint32_t app_inited_port_mask = 0;
+
+int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+#define MAX_NAME_LEN 32
+
+struct ring_conf ring_conf = {
+       .rx_size   = APP_RX_DESC_DEFAULT,
+       .ring_size = APP_RING_SIZE,
+       .tx_size   = APP_TX_DESC_DEFAULT,
+};
+
+struct burst_conf burst_conf = {
+       .rx_burst    = MAX_PKT_RX_BURST,
+       .ring_burst  = PKT_ENQUEUE,
+       .qos_dequeue = PKT_DEQUEUE,
+       .tx_burst    = MAX_PKT_TX_BURST,
+};
+
+struct ring_thresh rx_thresh = {
+       .pthresh = RX_PTHRESH,
+       .hthresh = RX_HTHRESH,
+       .wthresh = RX_WTHRESH,
+};
+
+struct ring_thresh tx_thresh = {
+       .pthresh = TX_PTHRESH,
+       .hthresh = TX_HTHRESH,
+       .wthresh = TX_WTHRESH,
+};
+
+uint32_t nb_pfc;
+const char *cfg_profile = NULL;
+struct flow_conf qos_conf[MAX_DATA_STREAMS];
+
+static const struct rte_eth_conf port_conf = {
+       .rxmode = {
+               .max_rx_pkt_len = ETHER_MAX_LEN,
+               .split_hdr_size = 0,
+               .header_split   = 0, /**< Header Split disabled */
+               .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+               .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+               .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
+               .hw_strip_crc   = 0, /**< CRC stripped by hardware */
+       },
+       .txmode = {
+               .mq_mode = ETH_DCB_NONE,
+       },
+};
+
+static int
+app_init_port(uint8_t portid, struct rte_mempool *mp)
+{
+       int ret;
+       struct rte_eth_link link;
+       struct rte_eth_rxconf rx_conf;
+       struct rte_eth_txconf tx_conf;
+
+       /* check if port already initialized (multistream configuration) */
+       if (app_inited_port_mask & (1u << portid))
+               return 0;
+
+       rx_conf.rx_thresh.pthresh = rx_thresh.pthresh;
+       rx_conf.rx_thresh.hthresh = rx_thresh.hthresh;
+       rx_conf.rx_thresh.wthresh = rx_thresh.wthresh;
+       rx_conf.rx_free_thresh = 32;
+       rx_conf.rx_drop_en = 0;
+
+       tx_conf.tx_thresh.pthresh = tx_thresh.pthresh;
+       tx_conf.tx_thresh.hthresh = tx_thresh.hthresh;
+       tx_conf.tx_thresh.wthresh = tx_thresh.wthresh;
+       tx_conf.tx_free_thresh = 0;
+       tx_conf.tx_rs_thresh = 0;
+       tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS;
+
+       /* init port */
+       RTE_LOG(INFO, APP, "Initializing port %hu... ", portid);
+       fflush(stdout);
+       ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%hu\n",
+               ret, portid);
+
+       /* init one RX queue */
+       fflush(stdout);
+       ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size,
+               rte_eth_dev_socket_id(portid), &rx_conf, mp);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%hu\n",
+               ret, portid);
+       
+       /* init one TX queue */
+       fflush(stdout);
+       ret = rte_eth_tx_queue_setup(portid, 0,
+               (uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+               "port=%hu queue=%d\n",
+               ret, portid, 0);
+
+       /* Start device */
+       ret = rte_eth_dev_start(portid);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "rte_pmd_port_start: err=%d, port=%hu\n",
+               ret, portid);
+
+       printf("done: ");
+
+       /* get link status */
+       rte_eth_link_get(portid, &link);
+       if (link.link_status) {
+               printf(" Link Up - speed %u Mbps - %s\n",
+                       (uint32_t) link.link_speed,
+                       (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+                       ("full-duplex") : ("half-duplex\n"));
+       } else {
+               printf(" Link Down\n");
+       }
+       rte_eth_promiscuous_enable(portid);
+       
+       /* mark port as initialized */
+       app_inited_port_mask |= 1u << portid;
+       
+       return 0;
+}
+
+static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+       {
+               .tb_rate = 1250000000,
+               .tb_size = 1000000,
+
+               .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+               .tc_period = 10,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+               .tc_ov_period = 10,
+#endif
+       },
+};
+
+static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = {
+       { /* Profile #0 */
+               .tb_rate = 305175,
+               .tb_size = 1000000,
+
+               .tc_rate = {305175, 305175, 305175, 305175}, 
+               .tc_period = 40,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+               .tc_ov_weight = {1, 1, 1, 1},
+#endif
+               
+               .wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
+       },
+};
+
+struct rte_sched_port_params port_params = {
+       .name = "port_0",
+       .socket = 0, /* computed */
+       .rate = 0, /* computed */
+       .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+       .n_subports_per_port = 1,
+       .n_pipes_per_subport = 4096,
+       .qsize = {64, 64, 64, 64},
+       .pipe_profiles = pipe_profiles,
+       .n_pipe_profiles = 1,
+
+#ifdef RTE_SCHED_RED
+       .red_params = {
+               /* Traffic Class 0 Colors Green / Yellow / Red */
+               [0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+               /* Traffic Class 1 - Colors Green / Yellow / Red */
+               [1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+               /* Traffic Class 2 - Colors Green / Yellow / Red */
+               [2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+               /* Traffic Class 3 - Colors Green / Yellow / Red */
+               [3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+               [3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}
+       }
+#endif /* RTE_SCHED_RED */
+};
+
+static struct rte_sched_port *
+app_init_sched_port(uint32_t portid, uint32_t socketid)
+{
+       static char port_name[32]; /* static as referenced from global port_params*/
+       struct rte_eth_link link;
+       struct rte_sched_port *port = NULL;
+       uint32_t pipe, subport;
+       int err;
+
+       rte_eth_link_get((uint8_t)portid, &link);
+
+       port_params.socket = socketid;
+       port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8;
+       rte_snprintf(port_name, sizeof(port_name), "port_%d", portid);
+       port_params.name = port_name;
+
+       port = rte_sched_port_config(&port_params);
+       if (port == NULL){
+               rte_exit(EXIT_FAILURE, "Unable to config sched port\n");
+       }
+
+       for (subport = 0; subport < port_params.n_subports_per_port; subport ++) {
+               err = rte_sched_subport_config(port, subport, &subport_params[subport]);
+               if (err) {
+                       rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n",
+                                       subport, err);
+               }
+       
+               for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) {
+                       if (app_pipe_to_profile[subport][pipe] != -1) {
+                               err = rte_sched_pipe_config(port, subport, pipe,
+                                               app_pipe_to_profile[subport][pipe]);
+                               if (err) {
+                                       rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u "
+                                                       "for profile %d, err=%d\n", pipe,
+                                                       app_pipe_to_profile[subport][pipe], err);
+                               }
+                       }
+               }
+       }
+       
+       return port;
+}
+
+static int
+app_load_cfg_profile(const char *profile)
+{
+       if (profile == NULL)
+               return 0;
+       
+       struct cfg_file *cfg_file = cfg_load(profile, 0);
+       if (cfg_file == NULL)
+               rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile);
+
+       cfg_load_port(cfg_file, &port_params);
+       cfg_load_subport(cfg_file, subport_params);
+       cfg_load_pipe(cfg_file, pipe_profiles);
+
+       cfg_close(cfg_file);
+
+       return 0;
+}
+
+int app_init(void)
+{
+       uint32_t i;
+       char ring_name[MAX_NAME_LEN];
+       char pool_name[MAX_NAME_LEN];
+
+       /* init driver(s) */
+       if (rte_pmd_init_all() < 0)
+               rte_exit(EXIT_FAILURE, "Cannot init PMD\n");
+
+       if (rte_eal_pci_probe() < 0)
+               rte_exit(EXIT_FAILURE, "Cannot probe PCI\n");
+
+       if (rte_eth_dev_count() == 0)
+               rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n");
+
+       /* load configuration profile */
+       if (app_load_cfg_profile(cfg_profile) != 0)
+               rte_exit(EXIT_FAILURE, "Invalid configuration profile\n");
+       
+       /* Initialize each active flow */
+       for(i = 0; i < nb_pfc; i++) {
+               uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+               struct rte_ring *ring;
+
+               rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core);
+               ring = rte_ring_lookup(ring_name);
+               if (ring == NULL)
+                       qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+                               socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+               else
+                       qos_conf[i].rx_ring = ring;
+
+               rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core);
+               ring = rte_ring_lookup(ring_name);
+               if (ring == NULL)
+                       qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+                               socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+               else
+                       qos_conf[i].tx_ring = ring;
+
+
+               /* create the mbuf pools for each RX Port */
+               rte_snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i);
+               qos_conf[i].mbuf_pool = rte_mempool_create(pool_name, NB_MBUF, MBUF_SIZE,
+                                               burst_conf.rx_burst * 4,
+                                               sizeof(struct rte_pktmbuf_pool_private),
+                                               rte_pktmbuf_pool_init, NULL,
+                                               rte_pktmbuf_init, NULL,
+                                               rte_eth_dev_socket_id(qos_conf[i].rx_port),
+                                               0);
+               if (qos_conf[i].mbuf_pool == NULL)
+                       rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i);
+
+               //printf("MP = %d\n", rte_mempool_count(qos_conf[i].app_pktmbuf_pool));
+
+               app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool);
+               app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool);
+               
+               qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].rx_port, socket);
+       }
+
+       RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n",
+                        rte_get_timer_hz());
+       
+       RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u,"
+                        "NIC TX = %u\n", ring_conf.rx_size, NB_MBUF, ring_conf.ring_size,
+                        ring_conf.tx_size);
+
+       RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n"
+                                                 "             Worker read/QoS enqueue = %hu,\n"
+                                                 "             QoS dequeue = %hu, Worker write = %hu\n",
+               burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst, 
+               burst_conf.qos_dequeue, burst_conf.tx_burst);
+
+       RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu),"
+                                "TX (p = %hhu, h = %hhu, w = %hhu)\n",
+               rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh,
+               tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh);
+
+       return 0;
+}
diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c
new file mode 100755 (executable)
index 0000000..b6cbe35
--- /dev/null
@@ -0,0 +1,246 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+
+#include <rte_sched.h>
+
+#include "main.h"
+
+#define APP_MODE_NONE 0
+#define APP_RX_MODE   1
+#define APP_WT_MODE   2
+#define APP_TX_MODE   4
+
+
+/* main processing loop */
+static int
+app_main_loop(__attribute__((unused))void *dummy)
+{
+       uint32_t lcore_id;
+       uint32_t i, mode;
+       uint32_t rx_idx = 0;
+       uint32_t wt_idx = 0;
+       uint32_t tx_idx = 0;
+       struct thread_conf *rx_confs[MAX_DATA_STREAMS];
+       struct thread_conf *wt_confs[MAX_DATA_STREAMS];
+       struct thread_conf *tx_confs[MAX_DATA_STREAMS];
+
+       memset(rx_confs, 0, sizeof(rx_confs));
+       memset(wt_confs, 0, sizeof(wt_confs));
+       memset(tx_confs, 0, sizeof(tx_confs));
+
+
+       mode = APP_MODE_NONE;
+       lcore_id = rte_lcore_id();
+
+       for (i = 0; i < nb_pfc; i++) {
+               struct flow_conf *flow = &qos_conf[i];
+
+               if (flow->rx_core == lcore_id) {
+                       flow->rx_thread.rx_port = flow->rx_port;
+                       flow->rx_thread.rx_ring =  flow->rx_ring;
+                       flow->rx_thread.rx_queue = flow->rx_queue;
+
+                       rx_confs[rx_idx++] = &flow->rx_thread;
+
+                       mode |= APP_RX_MODE;
+               }
+               if (flow->tx_core == lcore_id) {
+                       flow->tx_thread.tx_port = flow->tx_port;
+                       flow->tx_thread.tx_ring =  flow->tx_ring;
+                       flow->tx_thread.tx_queue = flow->tx_queue;
+
+                       tx_confs[tx_idx++] = &flow->tx_thread;
+
+                       mode |= APP_TX_MODE;
+               }
+               if (flow->wt_core == lcore_id) {
+                       flow->wt_thread.rx_ring =  flow->rx_ring;
+                       flow->wt_thread.tx_ring =  flow->tx_ring;
+                       flow->wt_thread.tx_port =  flow->tx_port;
+                       flow->wt_thread.sched_port =  flow->sched_port;
+
+                       wt_confs[wt_idx++] = &flow->wt_thread;
+
+                       mode |= APP_WT_MODE;
+               }
+       }
+
+       if (mode == APP_MODE_NONE) {
+               RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id);
+               return -1;
+       }
+
+       if (mode == (APP_RX_MODE | APP_WT_MODE)) {
+               RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n",
+                                lcore_id);
+               return -1;
+       }
+
+       RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id);
+       /* initialize mbuf memory */
+       if (mode == APP_RX_MODE) {
+               for (i = 0; i < rx_idx; i++) {
+                       RTE_LOG(INFO, APP, "flow %u lcoreid %u reading port %hu\n",
+                               i, lcore_id, rx_confs[i]->rx_port);
+               }
+
+               app_rx_thread(rx_confs);
+       }
+       else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
+               for (i = 0; i < wt_idx; i++) {
+                       wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
+                                       * burst_conf.tx_burst, CACHE_LINE_SIZE);
+
+                       if (wt_confs[i]->m_table == NULL)
+                               rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+                       RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write port %hu\n",
+                               i, lcore_id, wt_confs[i]->tx_port);
+               }
+
+               app_mixed_thread(wt_confs);
+       }
+       else if (mode == APP_TX_MODE) {
+               for (i = 0; i < tx_idx; i++) {
+                       tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
+                                       * burst_conf.tx_burst, CACHE_LINE_SIZE);
+
+                       if (tx_confs[i]->m_table == NULL)
+                               rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+                       RTE_LOG(INFO, APP, "flow %u lcoreid %u writing port %hu\n",
+                               i, lcore_id, tx_confs[i]->tx_port);
+               }
+
+               app_tx_thread(tx_confs);
+       }
+       else if (mode == APP_WT_MODE){
+               for (i = 0; i < wt_idx; i++) {
+                       RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id);
+               }
+
+               app_worker_thread(wt_confs);
+       }
+
+       return 0;
+}
+
+static void
+app_stat(void)
+{
+       uint32_t i;
+       struct rte_eth_stats stats;
+       static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS];
+       static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS];
+
+       /* print statistics */
+       for(i = 0; i < nb_pfc; i++) {
+               struct flow_conf *flow = &qos_conf[i];
+
+               rte_eth_stats_get(flow->rx_port, &stats);
+               printf("\nRX port %hu: rx: %"PRIu64 " err: %"PRIu64 " no_mbuf: %"PRIu64 "\n",
+                       flow->rx_port,
+                       stats.ipackets - rx_stats[i].ipackets,
+                       stats.ierrors - rx_stats[i].ierrors,
+                       stats.rx_nombuf - rx_stats[i].rx_nombuf);
+               memcpy(&rx_stats[i], &stats, sizeof(stats));
+
+               rte_eth_stats_get(flow->tx_port, &stats);
+               printf("TX port %hu: tx: %" PRIu64 " err: %" PRIu64 "\n",
+                       flow->tx_port,
+                       stats.opackets - tx_stats[i].opackets,
+                       stats.oerrors - tx_stats[i].oerrors);
+               memcpy(&tx_stats[i], &stats, sizeof(stats));
+
+               //printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool));
+
+#if APP_COLLECT_STAT
+               printf("-------+------------+------------+\n");
+               printf("       |  received  |   dropped  |\n");
+               printf("-------+------------+------------+\n");
+               printf("  RX   | %10" PRIu64 " | %10" PRIu64 " |\n",
+                       flow->rx_thread.stat.nb_rx,
+                       flow->rx_thread.stat.nb_drop);
+               printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " |   pps: %"PRIu64 " \n",
+                       flow->wt_thread.stat.nb_rx,
+                       flow->wt_thread.stat.nb_drop,
+                       flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop);
+               printf("-------+------------+------------+\n");
+
+               memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat));
+               memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat));
+#endif
+       }
+}
+
+
+
+int
+MAIN(int argc, char **argv)
+{
+       int ret;
+
+       ret = app_parse_args(argc, argv);
+       if (ret < 0)
+               return -1;
+
+       ret = app_init();
+       if (ret < 0)
+               return -1;
+
+
+       /* launch per-lcore init on every lcore */
+       rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER);
+       
+       /* print statistics every second */
+       while(1) {
+               sleep(1);
+               app_stat();
+       }
+}
+
+
+
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
new file mode 100755 (executable)
index 0000000..243064c
--- /dev/null
@@ -0,0 +1,186 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_sched.h>
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#error "Baremetal is not supported"
+#else
+#define MAIN main
+#endif
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define APP_RX_DESC_DEFAULT 128
+#define APP_TX_DESC_DEFAULT 256
+
+#define MBUF_SIZE (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define APP_RING_SIZE (8*1024)
+#define NB_MBUF   (64*1024*32)
+
+#define MAX_PKT_RX_BURST 64
+#define PKT_ENQUEUE 64
+#define PKT_DEQUEUE 32
+#define MAX_PKT_TX_BURST 64
+
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
+
+#define BURST_TX_DRAIN_US 100
+
+#define MAX_DATA_STREAMS (RTE_MAX_LCORE/2)
+#define MAX_SCHED_SUBPORTS             8
+#define MAX_SCHED_PIPES                4096
+
+#ifndef APP_COLLECT_STAT
+#define APP_COLLECT_STAT               1
+#endif
+
+#if APP_COLLECT_STAT
+#define APP_STATS_ADD(stat,val) (stat) += (val)
+#else
+#define APP_STATS_ADD(stat,val) do {(void) (val);} while (0)
+#endif
+
+struct thread_stat
+{
+       uint64_t nb_rx;
+       uint64_t nb_drop;
+};
+
+
+struct thread_conf
+{
+       uint32_t counter;
+       uint32_t n_mbufs;
+       struct rte_mbuf **m_table;
+
+       uint8_t rx_port;
+       uint8_t tx_port;
+       uint16_t rx_queue;
+       uint16_t tx_queue;
+       struct rte_ring *rx_ring;
+       struct rte_ring *tx_ring;
+       struct rte_sched_port *sched_port;
+
+#if APP_COLLECT_STAT
+       struct thread_stat stat;
+#endif
+} __rte_cache_aligned;
+
+
+struct flow_conf
+{
+       uint32_t rx_core;
+       uint32_t wt_core;
+       uint32_t tx_core;
+       uint8_t rx_port;
+       uint8_t tx_port;
+       uint16_t rx_queue;
+       uint16_t tx_queue;
+       struct rte_ring *rx_ring;
+       struct rte_ring *tx_ring;
+       struct rte_sched_port *sched_port;
+       struct rte_mempool *mbuf_pool;
+
+       struct thread_conf rx_thread;
+       struct thread_conf wt_thread;
+       struct thread_conf tx_thread;
+};
+
+
+struct ring_conf
+{
+       uint32_t rx_size;
+       uint32_t ring_size;
+       uint32_t tx_size;
+};
+
+struct burst_conf
+{
+       uint16_t rx_burst;
+       uint16_t ring_burst;
+       uint16_t qos_dequeue;
+       uint16_t tx_burst;
+};
+
+struct ring_thresh
+{
+       uint8_t pthresh; /**< Ring prefetch threshold. */
+       uint8_t hthresh; /**< Ring host threshold. */
+       uint8_t wthresh; /**< Ring writeback threshold. */
+};
+
+extern uint32_t nb_pfc;
+extern const char *cfg_profile;
+extern struct flow_conf qos_conf[];
+extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+extern struct ring_conf ring_conf;
+extern struct burst_conf burst_conf;
+extern struct ring_thresh rx_thresh;
+extern struct ring_thresh tx_thresh;
+
+extern struct rte_sched_port_params port_params;
+
+int MAIN(int argc, char **argv);
+int app_parse_args(int argc, char **argv);
+int app_init(void);
+
+void app_rx_thread(struct thread_conf **qconf);
+void app_tx_thread(struct thread_conf **qconf);
+void app_worker_thread(struct thread_conf **qconf);
+void app_mixed_thread(struct thread_conf **qconf);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MAIN_H_ */
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
new file mode 100644 (file)
index 0000000..5caa996
--- /dev/null
@@ -0,0 +1,109 @@
+;   BSD LICENSE
+; 
+;   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+;   All rights reserved.
+; 
+;   Redistribution and use in source and binary forms, with or without 
+;   modification, are permitted provided that the following conditions 
+;   are met:
+; 
+;     * Redistributions of source code must retain the above copyright 
+;       notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above copyright 
+;       notice, this list of conditions and the following disclaimer in 
+;       the documentation and/or other materials provided with the 
+;       distribution.
+;     * Neither the name of Intel Corporation nor the names of its 
+;       contributors may be used to endorse or promote products derived 
+;       from this software without specific prior written permission.
+; 
+;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+;   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+;   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+;   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+;   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+;   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+; 
+
+; This file enables the following hierarchical scheduler configuration for each
+; 10GbE output port:
+;      * Single subport (subport 0): 
+;              - Subport rate set to 100% of port rate
+;              - Each of the 4 traffic classes has rate set to 100% of port rate
+;      * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
+;              - Pipe rate set to 1/4K of port rate
+;              - Each of the 4 traffic classes has rate set to 100% of pipe rate
+;              - Within each traffic class, the byte-level WRR weights for the 4 queues
+;         are set to 1:1:1:1
+;
+; For more details, please refer to chapter "Quality of Service (QoS) Framework"
+; of Intel Data Plane Development Kit (Intel DPDK) Programmer's Guide.
+; Port configuration
+[port]
+frame overhead = 24
+number of subports per port = 1
+number of pipes per subport = 4096
+queue sizes = 64 64 64 64
+
+; Subport configuration
+[subport 0]
+tb rate = 1250000000           ; Bytes per second
+tb size = 1000000              ; Bytes
+
+tc 0 rate = 1250000000         ; Bytes per second
+tc 1 rate = 1250000000         ; Bytes per second
+tc 2 rate = 1250000000         ; Bytes per second
+tc 3 rate = 1250000000         ; Bytes per second
+tc period = 10                 ; Milliseconds
+tc oversubscription period = 10; Milliseconds
+
+pipe 0-4095 = 0                ; These pipes are configured with pipe profile 0
+
+; Pipe configuration
+[pipe profile 0]
+tb rate = 305175               ; Bytes per second
+tb size = 1000000              ; Bytes
+
+tc 0 rate = 305175             ; Bytes per second
+tc 1 rate = 305175             ; Bytes per second
+tc 2 rate = 305175             ; Bytes per second
+tc 3 rate = 305175             ; Bytes per second
+tc period = 40                 ; Milliseconds
+
+tc 0 oversubscription weight = 1
+tc 1 oversubscription weight = 1
+tc 2 oversubscription weight = 1
+tc 3 oversubscription weight = 1
+
+tc 0 wrr weights = 1 1 1 1
+tc 1 wrr weights = 1 1 1 1
+tc 2 wrr weights = 1 1 1 1
+tc 3 wrr weights = 1 1 1 1
+
+; RED params per traffic class and color (Green / Yellow / Red)
+[red]
+tc 0 wred min = 48 40 32
+tc 0 wred max = 64 64 64
+tc 0 wred inv prob = 10 10 10
+tc 0 wred weight = 9 9 9
+
+tc 1 wred min = 48 40 32
+tc 1 wred max = 64 64 64
+tc 1 wred inv prob = 10 10 10
+tc 1 wred weight = 9 9 9
+
+tc 2 wred min = 48 40 32
+tc 2 wred max = 64 64 64
+tc 2 wred inv prob = 10 10 10
+tc 2 wred weight = 9 9 9
+
+tc 3 wred min = 48 40 32
+tc 3 wred max = 64 64 64
+tc 3 wred inv prob = 10 10 10
+tc 3 wred weight = 9 9 9
index 122ba42..74162f2 100644 (file)
@@ -48,6 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
+DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_PMAC) += librte_pmac
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
index b0c735f..bf4a3e2 100644 (file)
@@ -74,6 +74,7 @@ extern struct rte_logs rte_logs;
 #define RTE_LOGTYPE_PMAC    0x00000200 /**< Log related to PMAC. */
 #define RTE_LOGTYPE_POWER   0x00000400 /**< Log related to power. */
 #define RTE_LOGTYPE_METER   0x00000800 /**< Log related to QoS meter. */
+#define RTE_LOGTYPE_SCHED   0x00001000 /**< Log related to QoS port scheduler. */
 
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1   0x01000000 /**< User-defined log type 1. */
index 27988c3..5d610cb 100644 (file)
@@ -158,6 +158,7 @@ struct rte_pktmbuf {
                        uint16_t hash;
                        uint16_t id;
                } fdir;             /**< Filter identifier if FDIR enabled */
+               uint32_t sched;     /**< Hierarchical scheduler */
        } hash;                 /**< hash information */
 };
 
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
new file mode 100644 (file)
index 0000000..5050db0
--- /dev/null
@@ -0,0 +1,56 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without 
+#   modification, are permitted provided that the following conditions 
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright 
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright 
+#       notice, this list of conditions and the following disclaimer in 
+#       the documentation and/or other materials provided with the 
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its 
+#       contributors may be used to endorse or promote products derived 
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_sched.a
+
+CFLAGS += -O3
+CFLAGS += -g
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c
new file mode 100644 (file)
index 0000000..c05e2a7
--- /dev/null
@@ -0,0 +1,197 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdlib.h>
+
+#include "rte_approx.h"
+
+/* 
+ * Based on paper "Approximating Rational Numbers by Fractions" by Michal 
+ * Forisek forisek@dcs.fmph.uniba.sk
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ *
+ * http://people.ksp.sk/~misof/publications/2007approx.pdf
+ */
+
+/* fraction comparison: compare (a/b) and (c/d) */
+static inline uint32_t 
+less(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+       return (a*d < b*c);
+}
+
+static inline uint32_t
+less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+       return (a*d <= b*c);
+}
+
+/* check whether a/b is a valid approximation */
+static inline uint32_t 
+matches(uint32_t a, uint32_t b, 
+       uint32_t alpha_num, uint32_t d_num, uint32_t denum)
+{
+       if (less_or_equal(a, b, alpha_num - d_num, denum))
+               return 0;
+
+       if (less(a ,b, alpha_num + d_num, denum))
+               return 1;
+       
+       return 0;
+}
+
+static inline void 
+find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, 
+       uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+       uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b;
+       uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a;
+       uint32_t k = (k_num / k_denum) + 1;
+       
+       *p = p_b + k * p_a;
+       *q = q_b + k * q_a;
+}
+
+static inline void
+find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
+       uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) 
+{
+       uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b;
+       uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a;
+       uint32_t k = (k_num / k_denum) + 1;
+       
+       *p = p_b + k * p_a;
+       *q = q_b + k * q_a;
+}
+
+static int 
+find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+       uint32_t p_a, q_a, p_b, q_b;
+       
+       /* check assumptions on the inputs */
+       if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) {
+               return -1;
+       }
+       
+       /* set initial bounds for the search */
+       p_a = 0;
+       q_a = 1;
+       p_b = 1;
+       q_b = 1;
+
+       while (1) {
+               uint32_t new_p_a, new_q_a, new_p_b, new_q_b;
+               uint32_t x_num, x_denum, x;
+               int aa, bb;
+               
+               /* compute the number of steps to the left */
+               x_num = denum * p_b - alpha_num * q_b;
+               x_denum = - denum * p_a + alpha_num * q_a;
+               x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+               
+               /* check whether we have a valid approximation */
+               aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+               bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+               if (aa || bb) {
+                       find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+                       return 0;
+               }
+               
+               /* update the interval */
+               new_p_a = p_b + (x - 1) * p_a ;
+               new_q_a = q_b + (x - 1) * q_a;
+               new_p_b = p_b + x * p_a ;
+               new_q_b = q_b + x * q_a;
+
+               p_a = new_p_a ;
+               q_a = new_q_a;
+               p_b = new_p_b ;
+               q_b = new_q_b;
+
+               /* compute the number of steps to the right */
+               x_num = alpha_num * q_b - denum * p_b;
+               x_denum = - alpha_num * q_a + denum * p_a;
+               x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+               /* check whether we have a valid approximation */
+               aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+               bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+               if (aa || bb) {
+                       find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+                       return 0;
+                }
+                
+               /* update the interval */
+               new_p_a = p_b + (x - 1) * p_a;
+               new_q_a = q_b + (x - 1) * q_a;
+               new_p_b = p_b + x * p_a;
+               new_q_b = q_b + x * q_a;
+               
+               p_a = new_p_a;
+               q_a = new_q_a;
+               p_b = new_p_b;
+               q_b = new_q_b;
+       }
+}
+
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q)
+{
+       uint32_t alpha_num, d_num, denum;
+       
+       /* Check input arguments */
+       if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) {
+               return -1;
+       }
+       
+       if ((p == NULL) || (q == NULL)) {
+               return -2;
+       }
+       
+       /* Compute alpha_num, d_num and denum */
+       denum = 1;
+       while (d < 1) {
+               alpha *= 10;
+               d *= 10;
+               denum *= 10;
+       }
+       alpha_num = (uint32_t) alpha;
+       d_num = (uint32_t) d;
+       
+       /* Perform approximation */
+       return find_best_rational_approximation(alpha_num, d_num, denum, p, q); 
+}
diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h
new file mode 100644 (file)
index 0000000..d755afa
--- /dev/null
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_APPROX_H__
+#define __INCLUDE_RTE_APPROX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Rational Approximation
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ * 
+ ***/
+
+#include <stdint.h>
+
+/**
+ * Find best rational approximation
+ *
+ * @param alpha
+ *   Rational number to approximate
+ * @param d
+ *   Precision for the rational approximation
+ * @param p
+ *   Pointer to pre-allocated space where the numerator of the rational 
+ *   approximation will be stored when operation is successful
+ * @param q
+ *   Pointer to pre-allocated space where the denominator of the rational
+ *   approximation will be stored when operation is successful
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_APPROX_H__ */
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h
new file mode 100644 (file)
index 0000000..c52db32
--- /dev/null
@@ -0,0 +1,505 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64-byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512 bits) of bitmap bits, which are stored in array2: the bit in array1 is
+ * set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there 
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the 
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+#ifndef RTE_BITMAP_OPTIMIZATIONS
+#define RTE_BITMAP_OPTIMIZATIONS                        1
+#endif
+#if RTE_BITMAP_OPTIMIZATIONS
+#include <tmmintrin.h>
+#endif
+
+/** Number of elements in array1. Each element in array1 is a 64-bit slab. */
+#ifndef RTE_BITMAP_ARRAY1_SIZE
+#define RTE_BITMAP_ARRAY1_SIZE                   16
+#endif
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE                 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
+#define RTE_BITMAP_SLAB_BIT_MASK                 (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE                   (CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2              9
+#define RTE_BITMAP_CL_BIT_MASK                   (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE                  (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2             3
+#define RTE_BITMAP_CL_SLAB_MASK                  (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+       uint64_t array1[RTE_BITMAP_ARRAY1_SIZE]; /**< Bitmap array1 */
+       uint64_t *array2;                        /**< Bitmap array2 */
+       uint32_t array1_size;                    /**< Number of 64-bit slabs in array1 that are actually used */
+       uint32_t array2_size;                    /**< Number of 64-bit slabs in array2 */
+       
+       /* Context for the "scan next" operation */
+       uint32_t index1;  /**< Bitmap scan: Index of current array1 slab */
+       uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+       uint32_t index2;  /**< Bitmap scan: Index of current array2 slab */
+       uint32_t go2;     /**< Bitmap scan: Go/stop condition for current array2 cache line */
+} __rte_cache_aligned;
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+       bmp->index1 = (bmp->index1 + 1) & (RTE_BITMAP_ARRAY1_SIZE - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+       return ((~1lu) << bmp->offset1);
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+       bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+#if RTE_BITMAP_OPTIMIZATIONS
+
+static inline int 
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+       if (likely(slab == 0)) {
+               return 0;
+       }
+
+       *pos = __builtin_ctzll(slab);
+       return 1;
+}
+
+#else
+
+static inline int 
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+       uint64_t mask;
+       uint32_t i;
+       
+       if (likely(slab == 0)) {
+               return 0;
+       }
+
+       for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
+               if (unlikely(slab & mask)) {
+                       *pos = i;
+                       return 1;
+               }
+       }
+       
+       return 0;
+}
+
+#endif
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+       bmp->index1 = RTE_BITMAP_ARRAY1_SIZE - 1;
+       bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+       __rte_bitmap_index2_set(bmp);
+       bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+       bmp->go2 = 0;
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param array2
+ *   Base address of pre-allocated array2
+ * @param n_bits
+ *   Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline int 
+rte_bitmap_init(struct rte_bitmap *bmp, uint8_t *array2, uint32_t n_bits)
+{
+       uint32_t array1_size, array2_size;
+
+       /* Check input arguments */
+       if ((bmp == NULL) || 
+           (array2 == NULL) || (((uintptr_t) array2) & CACHE_LINE_MASK) ||
+               (n_bits == 0) || (n_bits & RTE_BITMAP_CL_BIT_MASK)){
+               return -1;
+       }
+
+       array2_size = n_bits / RTE_BITMAP_SLAB_BIT_SIZE;
+       array1_size = ((n_bits / RTE_BITMAP_CL_BIT_SIZE) + (RTE_BITMAP_SLAB_BIT_SIZE - 1)) / RTE_BITMAP_SLAB_BIT_SIZE;
+       if (array1_size > RTE_BITMAP_ARRAY1_SIZE){
+               return -1;
+       }
+       
+       /* Setup bitmap */
+       memset(bmp, 0, sizeof(struct rte_bitmap));
+       bmp->array2 = (uint64_t *) array2;
+       bmp->array1_size = array1_size;
+       bmp->array2_size = array2_size;
+       __rte_bitmap_scan_init(bmp);
+       
+       return 0;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+       /* Check input arguments */
+       if (bmp == NULL) {
+               return -1;
+       }
+       
+       return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+       memset(bmp->array1, 0, sizeof(bmp->array1));
+       memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+       __rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+       uint64_t *slab2;
+       uint32_t index2;
+       
+       index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+       slab2 = bmp->array2 + index2;
+       rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ * @return
+ *   0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+       uint64_t *slab2;
+       uint32_t index2, offset2;
+       
+       index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+       offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+       slab2 = bmp->array2 + index2;
+       return ((*slab2) & (1lu << offset2));
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+       uint64_t *slab1, *slab2;
+       uint32_t index1, index2, offset1, offset2;
+       
+       /* Set bit in array2 slab and set bit in array1 slab */
+       index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+       offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+       index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+       offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+       slab2 = bmp->array2 + index2;
+       slab1 = bmp->array1 + index1;
+       
+       *slab2 |= 1lu << offset2;
+       *slab1 |= 1lu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position identifying the array2 slab
+ * @param slab
+ *   Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+       uint64_t *slab1, *slab2;
+       uint32_t index1, index2, offset1;
+       
+       /* Set bits in array2 slab and set bit in array1 slab */
+       index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+       index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+       offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+       slab2 = bmp->array2 + index2;
+       slab1 = bmp->array1 + index1;
+       
+       *slab2 |= slab;
+       *slab1 |= 1lu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+       uint64_t v1, v2, v3, v4;
+       
+       v1 = slab2[0] | slab2[1];
+       v2 = slab2[2] | slab2[3];
+       v3 = slab2[4] | slab2[5];
+       v4 = slab2[6] | slab2[7];
+       v1 |= v2;
+       v3 |= v4;
+       
+       return (v1 | v3);
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+       uint64_t *slab1, *slab2;
+       uint32_t index1, index2, offset1, offset2;
+
+       /* Clear bit in array2 slab */
+       index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+       offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+       slab2 = bmp->array2 + index2;
+       
+       /* Return if array2 slab is not all-zeros */
+       *slab2 &= ~(1lu << offset2);
+       if (*slab2){
+               return;
+       }
+       
+       /* Check the entire cache line of array2 for all-zeros */
+       index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+       slab2 = bmp->array2 + index2;
+       if (__rte_bitmap_line_not_empty(slab2)) {
+               return;
+       }
+       
+       /* The array2 cache line is all-zeros, so clear bit in array1 slab */
+       index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+       offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+       slab1 = bmp->array1 + index1;
+       *slab1 &= ~(1lu << offset1);
+
+       return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+       uint64_t value1;
+       uint32_t i;
+       
+       /* Check current array1 slab */
+       value1 = bmp->array1[bmp->index1];
+       value1 &= __rte_bitmap_mask1_get(bmp);
+       
+       if (rte_bsf64(value1, &bmp->offset1)) {
+               return 1;
+       }
+       
+       __rte_bitmap_index1_inc(bmp);
+       bmp->offset1 = 0;
+       
+       /* Look for another array1 slab */
+       for (i = 0; i < RTE_BITMAP_ARRAY1_SIZE; i ++, __rte_bitmap_index1_inc(bmp)) {
+               value1 = bmp->array1[bmp->index1];
+               
+               if (rte_bsf64(value1, &bmp->offset1)) {
+                       return 1;
+               }
+       }
+       
+       return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+       __rte_bitmap_index2_set(bmp);
+       bmp->go2 = 1;
+       rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+       uint64_t *slab2;
+       
+       slab2 = bmp->array2 + bmp->index2;
+       for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+               if (*slab2) {
+                       *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+                       *slab = *slab2;
+                       
+                       bmp->index2 ++;
+                       slab2 ++;
+                       bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+                       return 1;
+               }
+       }
+       
+       return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   When function call returns 1, pos contains the position of the next set
+ *   bit, otherwise not modified
+ * @param slab
+ *   When function call returns 1, slab contains the value of the entire 64-bit
+ *   slab where the bit indicated by pos is located. Slabs are always 64-bit
+ *   aligned, so the position of the first bit of the slab (this bit is not 
+ *   necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ *   scan operation, the internal pointers of the bitmap are updated to point
+ *   after this slab, so the same slab will not be returned again if it 
+ *   contains more than one bit which is set. When function call returns 0,
+ *   slab is not modified.
+ * @return
+ *   0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+       /* Return data from current array2 line if available */
+       if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+               return 1;
+       }
+       
+       /* Look for non-empty array2 line */
+       if (__rte_bitmap_scan_search(bmp)) {
+               __rte_bitmap_scan_read_init(bmp);
+               __rte_bitmap_scan_read(bmp, pos, slab);
+               return 1;
+       }
+       
+       /* Empty bitmap */
+       return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c
new file mode 100644 (file)
index 0000000..0eaf5a0
--- /dev/null
@@ -0,0 +1,160 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <math.h>
+#include "rte_red.h"
+#include <rte_random.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+static int rte_red_init_done = 0;     /**< Flag to indicate that global initialisation is done */
+uint32_t rte_red_rand_val = 0;        /**< Random value cache */
+uint32_t rte_red_rand_seed = 0;       /**< Seed for random number generation */
+
+/**
+ * table[i] = log2(1-Wq) * Scale * -1
+ *       Wq = 1/(2^i)
+ */
+uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+
+/**
+ * table[i] = 2^(i/16) * Scale
+ */
+uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * @brief Initialize tables used to compute average
+ *        queue size when queue is empty.
+ */
+static void
+__rte_red_init_tables(void)
+{
+       uint32_t i = 0;
+       double scale = 0.0;
+       double table_size = 0.0;
+
+       scale = (double)(1 << RTE_RED_SCALING);
+       table_size = (double)(DIM(rte_red_pow2_frac_inv));
+
+       for (i = 0; i < DIM(rte_red_pow2_frac_inv); i++) {
+               double m = (double)i;
+               
+               rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size));
+       }
+       
+       scale = 1024.0;
+
+       RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == DIM(rte_red_log2_1_minus_Wq));
+
+       for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) {
+               double n = (double)i;
+               double Wq = pow(2, -n);
+               uint32_t index = i - RTE_RED_WQ_LOG2_MIN;
+               
+               rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq));
+               /**
+               * Table entry of zero, corresponds to a Wq of zero
+               * which is not valid (avg would remain constant no
+               * matter how long the queue is empty). So we have
+               * to check for zero and round up to one.
+               */
+               if (rte_red_log2_1_minus_Wq[index] == 0) {
+                       rte_red_log2_1_minus_Wq[index] = 1;
+               }
+       }
+}
+
+int
+rte_red_rt_data_init(struct rte_red *red)
+{
+       if (red == NULL)
+               return -1;
+
+       red->avg = 0;
+       red->count = 0;
+       red->q_time = 0;
+       return 0;
+}
+
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+       const uint16_t wq_log2,
+       const uint16_t min_th,
+       const uint16_t max_th,
+       const uint16_t maxp_inv)
+{
+       if (red_cfg == NULL) {
+               return -1;
+       }
+       if (max_th > RTE_RED_MAX_TH_MAX) {
+               return -2;
+       }
+       if (min_th >= max_th) {
+               return -3;
+       }
+       if (wq_log2 > RTE_RED_WQ_LOG2_MAX) {
+               return -4;
+       }
+       if (wq_log2 < RTE_RED_WQ_LOG2_MIN) {
+               return -5;
+       }
+       if (maxp_inv < RTE_RED_MAXP_INV_MIN) {
+               return -6;
+       }
+       if (maxp_inv > RTE_RED_MAXP_INV_MAX) {
+               return -7;
+       }
+       
+       /**
+        *  Initialize the RED module if not already done
+        */
+       if (!rte_red_init_done) {
+               rte_red_rand_seed = rte_rand();
+               rte_red_rand_val = rte_fast_rand();
+               __rte_red_init_tables();
+               rte_red_init_done = 1;
+       }
+
+       red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING);
+       red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING);
+       red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING;
+       red_cfg->maxp_inv = maxp_inv;
+       red_cfg->wq_log2 = wq_log2;
+
+       return 0;
+}
diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h
new file mode 100644 (file)
index 0000000..debe556
--- /dev/null
@@ -0,0 +1,454 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __RTE_RED_H_INCLUDED__
+#define __RTE_RED_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Random Early Detection (RED)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+#include <limits.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_branch_prediction.h>
+
+#define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
+#define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
+#define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
+#define RTE_RED_WQ_LOG2_MIN                 1          /**< Min inverse filter weight value */
+#define RTE_RED_WQ_LOG2_MAX                 12         /**< Max inverse filter weight value */
+#define RTE_RED_MAXP_INV_MIN                1          /**< Min inverse mark probability value */
+#define RTE_RED_MAXP_INV_MAX                255        /**< Max inverse mark probability value */
+#define RTE_RED_2POW16                      (1<<16)    /**< 2 power 16 */
+#define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
+#define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
+
+#ifdef RTE_RED_DEBUG
+
+#define RTE_RED_ASSERT(exp)                                      \
+if (!(exp)) {                                                    \
+       rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
+}
+
+#else
+
+#define RTE_RED_ASSERT(exp)                 do { } while(0)
+
+#endif /* RTE_RED_DEBUG */
+
+/**
+ * Externs
+ * 
+ */
+extern uint32_t rte_red_rand_val;
+extern uint32_t rte_red_rand_seed;
+extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+extern uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * RED configuration parameters passed by user
+ * 
+ */
+struct rte_red_params {
+       uint16_t min_th;   /**< Minimum threshold for queue (max_th) */
+       uint16_t max_th;   /**< Maximum threshold for queue (max_th) */
+       uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
+       uint16_t wq_log2;  /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
+};
+
+/**
+ * RED configuration parameters
+ */
+struct rte_red_config {
+       uint32_t min_th;   /**< min_th scaled in fixed-point format */
+       uint32_t max_th;   /**< max_th scaled in fixed-point format */
+       uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
+       uint8_t maxp_inv;  /**< maxp_inv */
+       uint8_t wq_log2;   /**< wq_log2 */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_red {
+       uint32_t avg;      /**< Average queue size (avg), scaled in fixed-point format */
+       uint32_t count;    /**< Number of packets since last marked packet (count) */
+       uint64_t q_time;   /**< Start of the queue idle time (q_time) */
+};
+
+/** 
+ * @brief Initialises run-time data
+ *  
+ * @param [in,out] data pointer to RED runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_rt_data_init(struct rte_red *red);
+
+/** 
+ * @brief Configures a single RED configuration parameter structure.
+ * 
+ * @param [in,out] config pointer to a RED configuration parameter structure
+ * @param [in] wq_log2 log2 of the filter weight, valid range is:
+ *             RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
+ * @param [in] min_th queue minimum threshold in number of packets
+ * @param [in] max_th queue maximum threshold in number of packets
+ * @param [in] maxp_inv inverse maximum mark probability
+ * 
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+       const uint16_t wq_log2,
+       const uint16_t min_th,
+       const uint16_t max_th,
+       const uint16_t maxp_inv);
+
+/**
+ * @brief Generate random number for RED
+ *
+ * Implemenetation based on:
+ * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
+ *
+ * 10 bit shift has been found through empirical tests (was 16).
+ *
+ * @return Random number between 0 and (2^22 - 1)
+ */
+static inline uint32_t
+rte_fast_rand(void)
+{
+       rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
+       return (rte_red_rand_seed >> 10);
+}
+
+/**
+ * @brief calculate factor to scale average queue size when queue
+ *        becomes empty
+ *
+ * @param [in] wq_log2, where EWMA filter weight wq = 1/(2 ^ wq_log2)
+ * @param [in] m exponent in the computed value (1 - wq) ^ m
+ *
+ * @return computed value
+ * @retval ((1 - wq) ^ m) scaled in fixed-point format
+ */
+static inline uint16_t
+__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
+{
+       uint32_t n = 0;
+       uint32_t f = 0;
+
+       /**
+        * Basic math tells us that:
+        *   a^b = 2^(b * log2(a) )
+        *
+        * in our case:
+        *   a = (1-Wq)
+        *   b = m
+        *  Wq = 1/ (2^log2n)
+        *
+        * So we are computing this equation:
+        *   factor = 2 ^ ( m * log2(1-Wq))
+        *
+        * First we are computing:
+        *    n = m * log2(1-Wq)
+        *
+        * To avoid dealing with signed numbers log2 values are positive
+        * but they should be negative because (1-Wq) is always < 1.
+        * Contents of log2 table values are also scaled for precision.
+        */
+
+       n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
+
+       /**
+        * The tricky part is computing 2^n, for this I split n into
+        * integer part and fraction part.
+        *   f - is fraction part of n
+        *   n - is integer part of original n
+        *
+        * Now using basic math we compute 2^n:
+        *   2^(f+n) = 2^f * 2^n
+        *   2^f - we use lookup table
+        *   2^n - can be replaced with bit shift right oeprations
+        */
+
+       f = (n >> 6) & 0xf;
+       n >>= 10;
+
+       if (n < RTE_RED_SCALING)
+               return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
+
+       return 0;
+}
+
+/** 
+ * @brief Updates queue average in condition when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] time current time stamp
+ * 
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
+       struct rte_red *red,
+       const uint64_t time)
+{
+       uint64_t time_diff = 0, m = 0;
+       
+       RTE_RED_ASSERT(red_cfg != NULL);
+       RTE_RED_ASSERT(red != NULL);
+
+       red->count ++;
+
+       /**
+        * We compute avg but we don't compare avg against
+        *  min_th or max_th, nor calculate drop probability
+        */
+       time_diff = time - red->q_time;
+
+       /**
+        * m is the number of packets that might have arrived while the queue was empty.
+        * In this case we have time stamps provided by scheduler in byte units (bytes 
+        * transmitted on network port). Such time stamp translates into time units as
+        * port speed is fixed but such approach simplifies the code.
+        */
+       m = time_diff / RTE_RED_S;
+
+       /**
+        * Check that m will fit into 16-bit unsigned integer
+        */
+       if (m >= RTE_RED_2POW16) {
+               red->avg = 0;
+       } else {
+               red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
+       }
+
+       return 0;
+}
+
+/**
+ *  Drop probability (Sally Floyd and Van Jacobson):
+ *
+ *     pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
+ *     pa = pb / (2 - count * pb)
+ *
+ *
+ *                 (1 / maxp_inv) * (avg - min_th)
+ *                ---------------------------------
+ *                         max_th - min_th
+ *     pa = -----------------------------------------------
+ *                count * (1 / maxp_inv) * (avg - min_th)
+ *           2 - -----------------------------------------
+ *                          max_th - min_th
+ *
+ *
+ *                                  avg - min_th
+ *     pa = -----------------------------------------------------------
+ *           2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
+ *
+ *
+ *  We define pa_const as: pa_const =  2 * (max_th - min_th) * maxp_inv. Then:
+ *
+ *
+ *                     avg - min_th
+ *     pa = -----------------------------------
+ *           pa_const - count * (avg - min_th)
+ */
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on mark probability
+ *        criteria
+ *
+ * @param [in] config pointer to structure defining RED parameters
+ * @param [in,out] data pointer to RED runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
+{
+       uint32_t pa_num = 0;    /* numerator of drop-probability */
+       uint32_t pa_den = 0;    /* denominator of drop-probability */
+       uint32_t pa_num_count = 0;
+
+       pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
+
+       pa_num_count = red->count * pa_num;
+
+       if (red_cfg->pa_const <= pa_num_count)
+               return 1;
+
+       pa_den = red_cfg->pa_const - pa_num_count;
+
+       /* If drop, generate and save random number to be used next time */
+       if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
+               rte_red_rand_val = rte_fast_rand();
+               
+               return 1;
+       }
+       
+       /* No drop */
+       return 0;
+}
+
+/** 
+ * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] q current queue size (measured in packets)
+ * 
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
+       struct rte_red *red,
+       const unsigned q)
+{
+       RTE_RED_ASSERT(red_cfg != NULL);
+       RTE_RED_ASSERT(red != NULL);
+
+       /**
+       * EWMA filter (Sally Floyd and Van Jacobson):
+       *    avg = (1 - wq) * avg + wq * q
+       *    avg = avg + q * wq - avg * wq
+       *
+       * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
+       *    avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
+       *
+       * By using shift left/right operations, we get:
+       *    avg_s = avg_s + (q << N) - (avg_s >> n)
+       *    avg_s += (q << N) - (avg_s >> n)
+       */
+       
+       /* avg update */
+       red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
+
+       /* avg < min_th: do not mark the packet  */
+       if (red->avg < red_cfg->min_th) {
+               red->count ++;
+               return 0;
+       }
+
+       /* min_th <= avg < max_th: mark the packet with pa probability */
+       if (red->avg < red_cfg->max_th) {
+               if (!__rte_red_drop(red_cfg, red)) {
+                       red->count ++;
+                       return 0;
+               }
+
+               red->count = 0;
+               return 2;
+       }
+       
+       /* max_th <= avg: always mark the packet */
+       red->count = 0;
+       return 1;
+}
+
+/** 
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data based on new queue size value.
+ * Based on new queue average and RED configuration parameters
+ * gives verdict whether to enqueue or drop the packet. 
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] q updated queue size in packets
+ * @param [in] time current time stamp
+ * 
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criteria
+ * @retval 2 drop the packet based on mark probability criteria
+ */
+static inline int
+rte_red_enqueue(const struct rte_red_config *red_cfg,
+       struct rte_red *red,
+       const unsigned q,
+       const uint64_t time)
+{
+       RTE_RED_ASSERT(red_cfg != NULL);
+       RTE_RED_ASSERT(red != NULL);
+
+       if (q != 0) {
+               return rte_red_enqueue_nonempty(red_cfg, red, q);
+       } else {
+               return rte_red_enqueue_empty(red_cfg, red, time);
+       }
+}
+
+/** 
+ * @brief Callback to records time that queue became empty
+ *
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] time current time stamp
+ */
+static inline void
+rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
+{
+       red->q_time = time;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_RED_H_INCLUDED__ */
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
new file mode 100644 (file)
index 0000000..daa1a0d
--- /dev/null
@@ -0,0 +1,2129 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_mbuf.h>
+
+#include "rte_sched.h"
+#include "rte_bitmap.h"
+#include "rte_sched_common.h"
+#include "rte_approx.h"
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#ifndef RTE_SCHED_DEBUG
+#define RTE_SCHED_DEBUG                       0
+#endif
+
+#ifndef RTE_SCHED_OPTIMIZATIONS
+#define RTE_SCHED_OPTIMIZATIONS                          0
+#endif
+
+#if RTE_SCHED_OPTIMIZATIONS
+#include <immintrin.h>
+#endif
+
+#define RTE_SCHED_ENQUEUE                     1
+
+#define RTE_SCHED_TS                          1
+
+#if RTE_SCHED_TS == 0 /* Infinite credits. Traffic shaping disabled. */
+#define RTE_SCHED_TS_CREDITS_UPDATE           0
+#define RTE_SCHED_TS_CREDITS_CHECK            0
+#else                 /* Real Credits. Full traffic shaping implemented. */
+#define RTE_SCHED_TS_CREDITS_UPDATE           1
+#define RTE_SCHED_TS_CREDITS_CHECK            1
+#endif
+
+#ifndef RTE_SCHED_TB_RATE_CONFIG_ERR
+#define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
+#endif
+
+#define RTE_SCHED_WRR                         1
+
+#ifndef RTE_SCHED_WRR_SHIFT
+#define RTE_SCHED_WRR_SHIFT                   3
+#endif
+
+#ifndef RTE_SCHED_PORT_N_GRINDERS
+#define RTE_SCHED_PORT_N_GRINDERS             8
+#endif
+#if (RTE_SCHED_PORT_N_GRINDERS == 0) || (RTE_SCHED_PORT_N_GRINDERS & (RTE_SCHED_PORT_N_GRINDERS - 1))
+#error Number of grinders must be non-zero and a power of 2
+#endif
+#if (RTE_SCHED_OPTIMIZATIONS && (RTE_SCHED_PORT_N_GRINDERS != 8))
+#error Number of grinders must be 8 when RTE_SCHED_OPTIMIZATIONS is set
+#endif
+
+#define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
+       
+#define RTE_SCHED_PIPE_INVALID                UINT32_MAX
+
+#define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
+
+struct rte_sched_subport {
+       /* Token bucket (TB) */
+       uint64_t tb_time; /* time of last update */
+       uint32_t tb_period;
+       uint32_t tb_credits_per_period;
+       uint32_t tb_size;
+       uint32_t tb_credits;
+
+       /* Traffic classes (TCs) */
+       uint64_t tc_time; /* time of next update */
+       uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint32_t tc_period;
+       
+       /* TC oversubscription */
+       uint32_t tc_ov_period;
+       uint64_t tc_ov_time;
+       uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint8_t tc_ov_period_id;
+       uint8_t tc_ov[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint32_t tc_ov_n[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       double tc_ov_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       
+       /* Statistics */
+       struct rte_sched_subport_stats stats;
+};
+
+struct rte_sched_pipe_profile {
+       /* Token bucket (TB) */
+       uint32_t tb_period;
+       uint32_t tb_credits_per_period;
+       uint32_t tb_size;
+       
+       /* Pipe traffic classes */
+       uint32_t tc_period;
+       uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       
+       /* Pipe queues */
+       uint8_t  wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
+};
+
+struct rte_sched_pipe {
+       /* Token bucket (TB) */
+       uint64_t tb_time; /* time of last update */
+       uint32_t tb_credits;
+
+       /* Pipe profile and flags */
+       uint32_t profile;
+       
+       /* Traffic classes (TCs) */
+       uint64_t tc_time; /* time of next update */
+       uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       
+       /* Weighted Round Robin (WRR) */
+       uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
+       
+       /* TC oversubscription */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+       uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint8_t tc_ov_period_id;
+#else
+       uint64_t reserved;
+#endif
+} __rte_cache_aligned;
+
+struct rte_sched_queue {
+       uint16_t qw;
+       uint16_t qr;
+};
+
+struct rte_sched_queue_extra {
+       struct rte_sched_queue_stats stats;
+#ifdef RTE_SCHED_RED
+       struct rte_red red;
+#endif
+};
+
+enum grinder_state {
+       e_GRINDER_PREFETCH_PIPE = 0,
+       e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
+       e_GRINDER_PREFETCH_MBUF,
+       e_GRINDER_READ_MBUF
+};
+
+struct rte_sched_grinder {
+       /* Pipe cache */
+       uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
+       uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
+       uint32_t pcache_w;
+       uint32_t pcache_r;
+       
+       /* Current pipe */
+       enum grinder_state state;
+       uint32_t productive;
+       uint32_t pindex;
+       struct rte_sched_subport *subport;
+       struct rte_sched_pipe *pipe;
+       struct rte_sched_pipe_profile *pipe_params;
+
+       /* TC cache */
+       uint8_t tccache_qmask[4];
+       uint32_t tccache_qindex[4];
+       uint32_t tccache_w;
+       uint32_t tccache_r;
+       
+       /* Current TC */
+       uint32_t tc_index;
+       struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint16_t qsize;
+       uint32_t qmask;
+       uint32_t qpos;
+       struct rte_mbuf *pkt;
+       
+       double ov_coef;
+       
+       uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+       uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+       uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+};
+
+struct rte_sched_port {
+       /* User parameters */
+       uint32_t n_subports_per_port;
+       uint32_t n_pipes_per_subport;
+       uint32_t rate;
+       uint32_t frame_overhead;
+       uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint32_t n_pipe_profiles;
+#ifdef RTE_SCHED_RED
+       struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS];
+#endif
+
+       /* Timing */
+       uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
+       uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
+       uint64_t time;                /* Current NIC TX time measured in bytes */
+       double cycles_per_byte;       /* CPU cycles per byte */
+       
+       /* Scheduling loop detection */
+       uint32_t pipe_loop;
+       uint32_t pipe_exhaustion;
+
+       /* Bitmap */
+       struct rte_bitmap bmp;
+       uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
+       
+       /* Grinders */
+       struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
+       uint32_t busy_grinders;
+       struct rte_mbuf **pkts_out;
+       uint32_t n_pkts_out;
+       
+       /* Queue base calculation */
+       uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
+       uint32_t qsize_sum;
+       
+       /* Large data structures */
+       struct rte_sched_subport *subport;
+       struct rte_sched_pipe *pipe;
+       struct rte_sched_queue *queue;
+       struct rte_sched_queue_extra *queue_extra;
+       struct rte_sched_pipe_profile *pipe_profiles;
+       uint8_t *bmp_array;
+       struct rte_mbuf **queue_array;
+       uint8_t memory[0] __rte_cache_aligned;
+} __rte_cache_aligned;
+
+enum rte_sched_port_array {
+       e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0,
+       e_RTE_SCHED_PORT_ARRAY_PIPE,
+       e_RTE_SCHED_PORT_ARRAY_QUEUE,
+       e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA,
+       e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES,
+       e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY,
+       e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY,
+       e_RTE_SCHED_PORT_ARRAY_TOTAL,
+};
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline uint32_t
+rte_sched_port_queues_per_subport(struct rte_sched_port *port)
+{
+       return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport;
+}
+
+#endif
+
+static inline uint32_t
+rte_sched_port_queues_per_port(struct rte_sched_port *port)
+{
+       return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port;
+}
+
+static int
+rte_sched_port_check_params(struct rte_sched_port_params *params)
+{
+       uint32_t i, j;
+       
+       if (params == NULL) {
+               return -1;
+       }
+       
+       /* name */
+       if (params->name == NULL) {
+               return -2;
+       }
+       
+       /* socket */
+       if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES)) {
+               return -3;
+       }
+       
+       /* rate */
+       if (params->rate == 0) {
+               return -4;
+       }
+       
+       /* n_subports_per_port: non-zero, power of 2 */
+       if ((params->n_subports_per_port == 0) || (!rte_is_power_of_2(params->n_subports_per_port))) {
+               return -5;
+       }
+
+       /* n_pipes_per_subport: non-zero, power of 2 */
+       if ((params->n_pipes_per_subport == 0) || (!rte_is_power_of_2(params->n_pipes_per_subport))) {
+               return -6;
+       }
+       
+       /* qsize: non-zero, power of 2, no bigger than 32K (due to 16-bit read/write pointers) */
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               uint16_t qsize = params->qsize[i];
+               
+               if ((qsize == 0) || (!rte_is_power_of_2(qsize))) {
+                       return -7;
+               }
+       }
+       
+       /* pipe_profiles and n_pipe_profiles */
+       if ((params->pipe_profiles == NULL) || 
+           (params->n_pipe_profiles == 0) ||
+           (params->n_pipe_profiles > RTE_SCHED_PIPE_PROFILES_PER_PORT)) {
+               return -8;
+       }
+       
+       for (i = 0; i < params->n_pipe_profiles; i ++) {
+               struct rte_sched_pipe_params *p = params->pipe_profiles + i;
+               
+               /* TB rate: non-zero, not greater than port rate */
+               if ((p->tb_rate == 0) || (p->tb_rate > params->rate)) {
+                       return -9;
+               }
+               
+               /* TB size: non-zero */
+               if (p->tb_size == 0) {
+                       return -10;
+               }
+
+               /* TC rate: non-zero, less than pipe rate */
+               for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+                       if ((p->tc_rate[j] == 0) || (p->tc_rate[j] > p->tb_rate)) {
+                               return -11;
+                       }
+               }
+               
+               /* TC period: non-zero */
+               if (p->tc_period == 0) {
+                       return -12;
+               }
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+               /* TC oversubscription weights: non-zero */
+               for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+                       if (p->tc_ov_weight[j] == 0) {
+                               return -13;
+                       }
+               }
+#endif
+
+               /* Queue WRR weights: non-zero */
+               for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j ++) {
+                       if (p->wrr_weights[j] == 0) {
+                               return -14;
+                       }
+               }
+       }
+       
+       return 0;
+}
+
+static uint32_t
+rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sched_port_array array)
+{
+       uint32_t n_subports_per_port = params->n_subports_per_port;
+       uint32_t n_pipes_per_subport = params->n_pipes_per_subport;
+       uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port;
+       uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port;
+       
+       uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport);
+       uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe);
+       uint32_t size_queue = n_queues_per_port * sizeof(struct rte_sched_queue);
+       uint32_t size_queue_extra = n_queues_per_port * sizeof(struct rte_sched_queue_extra);
+       uint32_t size_pipe_profiles = RTE_SCHED_PIPE_PROFILES_PER_PORT * sizeof(struct rte_sched_pipe_profile);
+       uint32_t size_bmp_array = n_queues_per_port / 8;
+       uint32_t size_per_pipe_queue_array, size_queue_array;
+       
+       uint32_t base, i;
+       
+       size_per_pipe_queue_array = 0;
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * params->qsize[i] * sizeof(struct rte_mbuf *);
+       }
+       size_queue_array = n_pipes_per_port * size_per_pipe_queue_array;
+       
+       base = 0;
+       
+       if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT) return base;
+       base += CACHE_LINE_ROUNDUP(size_subport);
+       
+       if (array == e_RTE_SCHED_PORT_ARRAY_PIPE) return base;
+       base += CACHE_LINE_ROUNDUP(size_pipe);
+
+       if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE) return base;
+       base += CACHE_LINE_ROUNDUP(size_queue);
+       
+       if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA) return base;
+       base += CACHE_LINE_ROUNDUP(size_queue_extra);
+       
+       if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES) return base;
+       base += CACHE_LINE_ROUNDUP(size_pipe_profiles);
+
+       if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY) return base;
+       base += CACHE_LINE_ROUNDUP(size_bmp_array);
+
+       if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY) return base;
+       base += CACHE_LINE_ROUNDUP(size_queue_array);
+
+       return base;
+}
+
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params)
+{
+       uint32_t size0, size1;
+       int status;
+       
+       status = rte_sched_port_check_params(params);
+       if (status != 0) {
+               RTE_LOG(INFO, SCHED, "Port scheduler params check failed (%d)\n", status);
+               
+               return 0;
+       }
+       
+       size0 = sizeof(struct rte_sched_port);
+       size1 = rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_TOTAL);
+       
+       return (size0 + size1);
+}
+
+static void
+rte_sched_port_config_qsize(struct rte_sched_port *port)
+{
+       /* TC 0 */
+       port->qsize_add[0] = 0;
+       port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
+       port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
+       port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
+       
+       /* TC 1 */
+       port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
+       port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
+       port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
+       port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
+
+       /* TC 2 */
+       port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
+       port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
+       port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
+       port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
+
+       /* TC 3 */
+       port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
+       port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
+       port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
+       port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
+       
+       port->qsize_sum = port->qsize_add[15] + port->qsize[3];
+}
+
+static void 
+rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
+{
+       struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
+       
+       RTE_LOG(INFO, SCHED, "Low level config for pipe profile %u:\n"
+               "\tToken bucket: period = %u, credits per period = %u, size = %u\n"
+               "\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov weights = [%hhu, %hhu, %hhu, %hhu]\n"
+               "\tWRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
+               i,
+               
+               /* Token bucket */
+               p->tb_period,
+               p->tb_credits_per_period,
+               p->tb_size,
+               
+               /* Traffic classes */
+               p->tc_period,
+               p->tc_credits_per_period[0],
+               p->tc_credits_per_period[1],
+               p->tc_credits_per_period[2],
+               p->tc_credits_per_period[3],
+               p->tc_ov_weight[0],
+               p->tc_ov_weight[1],
+               p->tc_ov_weight[2],
+               p->tc_ov_weight[3],
+               
+               /* WRR */
+               p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
+               p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
+               p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
+               p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
+}
+
+static inline uint64_t
+rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
+{
+       uint64_t time = time_ms;
+       time = (time * rate) / 1000;
+       
+       return time;
+}
+
+static void
+rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params)
+{
+       uint32_t i, j;
+       
+       for (i = 0; i < port->n_pipe_profiles; i ++) {
+               struct rte_sched_pipe_params *src = params->pipe_profiles + i;
+               struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
+               
+               /* Token Bucket */
+               if (src->tb_rate == params->rate) {
+                       dst->tb_credits_per_period = 1;
+                       dst->tb_period = 1;
+               } else {
+                       double tb_rate = ((double) src->tb_rate) / ((double) params->rate);
+                       double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+                       
+                       rte_approx(tb_rate, d, &dst->tb_credits_per_period, &dst->tb_period);
+               }
+               dst->tb_size = src->tb_size;
+               
+               /* Traffic Classes */
+               dst->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, params->rate);
+               for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+                       dst->tc_credits_per_period[j] = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, src->tc_rate[j]);
+               }
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+               for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+                       dst->tc_ov_weight[j] = src->tc_ov_weight[j];
+               }
+#endif
+               
+               /* WRR */
+               for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+                       uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+                       uint32_t lcd, lcd1, lcd2;
+                       uint32_t qindex;
+                       
+                       qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+                       
+                       wrr_cost[0] = src->wrr_weights[qindex];
+                       wrr_cost[1] = src->wrr_weights[qindex + 1];
+                       wrr_cost[2] = src->wrr_weights[qindex + 2];
+                       wrr_cost[3] = src->wrr_weights[qindex + 3];
+                       
+                       lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
+                       lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
+                       lcd = rte_get_lcd(lcd1, lcd2);
+
+                       wrr_cost[0] = lcd / wrr_cost[0];
+                       wrr_cost[1] = lcd / wrr_cost[1];
+                       wrr_cost[2] = lcd / wrr_cost[2];
+                       wrr_cost[3] = lcd / wrr_cost[3];
+                       
+                       dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
+                       dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
+                       dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
+                       dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
+               }
+       
+               rte_sched_port_log_pipe_profile(port, i);
+       }
+}
+
+struct rte_sched_port *
+rte_sched_port_config(struct rte_sched_port_params *params)
+{
+       struct rte_sched_port *port = NULL;
+       const struct rte_memzone *mz = NULL;
+       uint32_t mem_size, i;
+       
+       /* Check user parameters. Determine the amount of memory to allocate */
+       mem_size = rte_sched_port_get_memory_footprint(params);
+       if (mem_size == 0) {
+               return NULL;
+       }
+       
+       /* Allocate memory to store the data structures */
+       mz = rte_memzone_lookup(params->name);
+       if (mz) {
+               /* Use existing memzone, provided that its size is big enough */
+               if (mz->len < mem_size) {
+                       return NULL;
+               }
+       } else {
+               /* Create new memzone */
+               mz = rte_memzone_reserve(params->name, mem_size, params->socket, 0);            
+               if (mz == NULL) {
+                       return NULL;
+               }
+       }
+       memset(mz->addr, 0, mem_size);
+       port = (struct rte_sched_port *) mz->addr;
+
+       /* User parameters */
+       port->n_subports_per_port = params->n_subports_per_port;
+       port->n_pipes_per_subport = params->n_pipes_per_subport;
+       port->rate = params->rate;
+       port->frame_overhead = params->frame_overhead;
+       memcpy(port->qsize, params->qsize, sizeof(params->qsize));
+       port->n_pipe_profiles = params->n_pipe_profiles;
+
+#ifdef RTE_SCHED_RED
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+               uint32_t j;
+               
+               for (j = 0; j < e_RTE_METER_COLORS; j++) {
+                       if (rte_red_config_init(&port->red_config[i][j],
+                               params->red_params[i][j].wq_log2,
+                               params->red_params[i][j].min_th,
+                               params->red_params[i][j].max_th,
+                               params->red_params[i][j].maxp_inv) != 0) {
+                               return NULL;
+                       }
+               }
+       }
+#endif
+
+       /* Timing */
+       port->time_cpu_cycles = rte_get_tsc_cycles();
+       port->time_cpu_bytes = 0;
+       port->time = 0;
+       port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) params->rate);
+
+       /* Scheduling loop detection */
+       port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+       port->pipe_exhaustion = 0;
+
+       /* Grinders */
+       port->busy_grinders = 0;
+       port->pkts_out = NULL;
+       port->n_pkts_out = 0;
+       
+       /* Queue base calculation */
+       rte_sched_port_config_qsize(port);
+       
+       /* Large data structures */
+       port->subport = (struct rte_sched_subport *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_SUBPORT));
+       port->pipe = (struct rte_sched_pipe *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE));
+       port->queue = (struct rte_sched_queue *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE));
+       port->queue_extra = (struct rte_sched_queue_extra *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA));
+       port->pipe_profiles = (struct rte_sched_pipe_profile *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES));
+       port->bmp_array =  port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY);
+       port->queue_array = (struct rte_mbuf **) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY));
+
+       /* Pipe profile table */
+       rte_sched_port_config_pipe_profile_table(port, params);
+       
+       /* Bitmap */
+       if (rte_bitmap_init(&port->bmp, port->bmp_array, rte_sched_port_queues_per_port(port)) != 0) {
+               RTE_LOG(INFO, SCHED, "Bitmap init error\n");
+               return NULL;
+       }
+       for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) {
+               port->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
+       }
+       
+       return port;
+}
+
+void 
+rte_sched_port_free(struct rte_sched_port *port)
+{
+       /* Check user parameters */
+       if (port == NULL){
+               return;
+       }
+       rte_bitmap_free(&port->bmp);
+       
+       return;
+}
+
+static void
+rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
+{
+       struct rte_sched_subport *s = port->subport + i;
+       
+       RTE_LOG(INFO, SCHED, "Low level config for subport %u:\n"       
+               "\tToken bucket: period = %u, credits per period = %u, size = %u\n"
+               "\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov period = %u\n",
+               i,
+               
+               /* Token bucket */
+               s->tb_period,
+               s->tb_credits_per_period,
+               s->tb_size,
+               
+               /* Traffic classes */
+               s->tc_period,
+               s->tc_credits_per_period[0],
+               s->tc_credits_per_period[1],
+               s->tc_credits_per_period[2],
+               s->tc_credits_per_period[3],
+               s->tc_ov_period);
+}
+
+int
+rte_sched_subport_config(struct rte_sched_port *port, 
+       uint32_t subport_id,
+       struct rte_sched_subport_params *params)
+{
+       struct rte_sched_subport *s;
+       uint32_t i;
+       
+       /* Check user parameters */
+       if ((port == NULL) ||
+           (subport_id >= port->n_subports_per_port) ||
+               (params == NULL)) {
+               return -1;
+       }
+       
+       if ((params->tb_rate == 0) || (params->tb_rate > port->rate)) {
+               return -2;
+       }
+       
+       if (params->tb_size == 0) {
+               return -3;
+       }
+       
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               if ((params->tc_rate[i] == 0) || (params->tc_rate[i] > params->tb_rate)) {
+                       return -4;
+               }
+       }
+       
+       if (params->tc_period == 0) {
+               return -5;
+       }
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+       if ((params->tc_ov_period == 0) || (params->tc_ov_period > params->tc_period)) {
+               return -6;
+       }
+#endif
+       
+       s = port->subport + subport_id;
+       
+       /* Token Bucket (TB) */
+       if (params->tb_rate == port->rate) {
+               s->tb_credits_per_period = 1;
+               s->tb_period = 1;
+       } else {
+               double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
+               double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+               
+               rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
+       }
+       s->tb_size = params->tb_size;
+       s->tb_time = port->time;
+       s->tb_credits = s->tb_size / 2;
+       
+       /* Traffic Classes (TCs) */
+       s->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               s->tc_credits_per_period[i] = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, params->tc_rate[i]);
+       }
+       s->tc_time = port->time + s->tc_period;
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               s->tc_credits[i] = s->tc_credits_per_period[i];
+       }
+       
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+       /* TC oversubscription */
+       s->tc_ov_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_ov_period, port->rate);
+       s->tc_ov_time = port->time + s->tc_ov_period;
+       s->tc_ov_period_id = 0;
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               s->tc_ov[i] = 0;
+               s->tc_ov_n[i] = 0;
+               s->tc_ov_rate[i] = 0;
+               s->tc_ov_credits[i] = 0;
+       }
+#endif
+       
+       rte_sched_port_log_subport_config(port, subport_id);
+       
+       return 0;
+}
+
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+       uint32_t subport_id, 
+       uint32_t pipe_id,
+       int32_t pipe_profile)
+{
+       struct rte_sched_subport *s;
+       struct rte_sched_pipe *p;
+       struct rte_sched_pipe_profile *params;
+       uint32_t deactivate, profile, i;
+       
+       /* Check user parameters */
+       profile = (uint32_t) pipe_profile;
+       deactivate = (pipe_profile < 0);
+       if ((port == NULL) ||
+           (subport_id >= port->n_subports_per_port) ||
+               (pipe_id >= port->n_pipes_per_subport) ||
+               ((!deactivate) && (profile >= port->n_pipe_profiles))) {
+               return -1;
+       }
+       
+       /* Check that subport configuration is valid */
+       s = port->subport + subport_id;
+       if (s->tb_period == 0) {
+               return -2;
+       }
+       
+       p = port->pipe + (subport_id * port->n_pipes_per_subport + pipe_id);
+       
+       /* Handle the case when pipe already has a valid configuration */
+       if (p->tb_time) {
+               params = port->pipe_profiles + p->profile;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+               /* Unplug pipe from its subport */
+               for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+                       s->tc_ov_n[i] -= params->tc_ov_weight[i];
+                       s->tc_ov_rate[i] -= ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period);
+                       s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period));
+               }
+#endif
+               
+               /* Reset the pipe */
+               memset(p, 0, sizeof(struct rte_sched_pipe));
+       }
+       
+       if (deactivate) {
+               return 0;
+       }
+       
+       /* Apply the new pipe configuration */
+       p->profile = profile;
+       params = port->pipe_profiles + p->profile;
+
+       /* Token Bucket (TB) */
+       p->tb_time = port->time;
+       p->tb_credits = params->tb_size / 2;
+       
+       /* Traffic Classes (TCs) */
+       p->tc_time = port->time + params->tc_period;
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               p->tc_credits[i] = params->tc_credits_per_period[i];
+       }
+       
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+       /* Subport TC oversubscription */
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               s->tc_ov_n[i] += params->tc_ov_weight[i];
+               s->tc_ov_rate[i] += ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period);
+               s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period));
+       }
+       p->tc_ov_period_id = s->tc_ov_period_id;
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               p->tc_ov_credits[i] = 0;
+       }
+#endif
+       
+       return 0;
+}
+
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+       uint32_t subport_id,
+       struct rte_sched_subport_stats *stats,
+       uint32_t *tc_ov)
+{
+       struct rte_sched_subport *s;
+       uint32_t mask, i;
+       
+       /* Check user parameters */
+       if ((port == NULL) ||
+           (subport_id >= port->n_subports_per_port) ||
+               (stats == NULL) ||
+               (tc_ov == NULL)) {
+               return -1;
+       }
+       s = port->subport + subport_id;
+
+       /* Copy subport stats and clear */
+       memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
+       memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
+       
+       /* Subport TC ovesubscription status */
+       mask = 0;
+       for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+               mask |= ((uint32_t) s->tc_ov[i]) << i;
+       }
+       *tc_ov = mask;
+       
+       return 0;
+}
+
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+       uint32_t queue_id,
+       struct rte_sched_queue_stats *stats,
+       uint16_t *qlen)
+{
+       struct rte_sched_queue *q;
+       struct rte_sched_queue_extra *qe;
+       
+       /* Check user parameters */
+       if ((port == NULL) ||
+           (queue_id >= rte_sched_port_queues_per_port(port)) ||
+               (stats == NULL) ||
+               (qlen == NULL)) {
+               return -1;
+       }
+       q = port->queue + queue_id;
+       qe = port->queue_extra + queue_id;
+
+       /* Copy queue stats and clear */
+       memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
+       memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
+       
+       /* Queue length */
+       *qlen = q->qw - q->qr;
+       
+       return 0;
+}
+
+static inline uint32_t
+rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue)
+{
+       uint32_t result;
+       
+       result = subport * port->n_pipes_per_subport + pipe;
+       result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + traffic_class;
+       result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
+       
+       return result;
+}
+
+static inline struct rte_mbuf **
+rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
+{
+       uint32_t pindex = qindex >> 4;
+       uint32_t qpos = qindex & 0xF;
+       
+       return (port->queue_array + pindex * port->qsize_sum + port->qsize_add[qpos]);
+}
+
+static inline uint16_t
+rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
+{
+       uint32_t tc = (qindex >> 2) & 0x3;
+       
+       return port->qsize[tc];
+}
+
+#if RTE_SCHED_DEBUG
+
+static inline int
+rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex)
+{
+       struct rte_sched_queue *queue = port->queue + qindex;
+       
+       return (queue->qr == queue->qw);
+}
+
+static inline int
+rte_sched_port_queue_is_full(struct rte_sched_port *port, uint32_t qindex)
+{
+       struct rte_sched_queue *queue = port->queue + qindex;
+       uint16_t qsize = rte_sched_port_qsize(port, qindex);
+       uint16_t qlen = q->qw - q->qr;
+       
+       return (qlen >= qsize);
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline void
+rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+       struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+       uint32_t tc_index = (qindex >> 2) & 0x3;
+       uint32_t pkt_len = pkt->pkt.pkt_len;
+       
+       s->stats.n_pkts_tc[tc_index] += 1;
+       s->stats.n_bytes_tc[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+       struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+       uint32_t tc_index = (qindex >> 2) & 0x3;
+       uint32_t pkt_len = pkt->pkt.pkt_len;
+       
+       s->stats.n_pkts_tc_dropped[tc_index] += 1;
+       s->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+       struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+       uint32_t pkt_len = pkt->pkt.pkt_len;
+       
+       qe->stats.n_pkts += 1;
+       qe->stats.n_bytes += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+       struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+       uint32_t pkt_len = pkt->pkt.pkt_len;
+       
+       qe->stats.n_pkts_dropped += 1;
+       qe->stats.n_bytes_dropped += pkt_len;
+}
+
+#endif /* RTE_SCHED_COLLECT_STATS */
+
+#ifdef RTE_SCHED_RED
+
+static inline int
+rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen)
+{
+       struct rte_sched_queue_extra *qe;
+       struct rte_red_config *red_cfg;
+    struct rte_red *red;
+       uint32_t tc_index;
+       enum rte_meter_color color;
+       
+       tc_index = (qindex >> 2) & 0x3;
+       color = rte_sched_port_pkt_read_color(pkt);
+       red_cfg = &port->red_config[tc_index][color];
+
+       qe = port->queue_extra + qindex;
+       red = &qe->red;
+
+       return rte_red_enqueue(red_cfg, red, qlen, port->time);
+}
+
+static inline void
+rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex)
+{
+       struct rte_sched_queue_extra *qe;
+    struct rte_red *red;
+       
+       qe = port->queue_extra + qindex;
+       red = &qe->red;
+
+       rte_red_mark_queue_empty(red, port->time);
+}
+
+#else
+
+#define rte_sched_port_red_drop(port, pkt, qindex, qlen)             0
+
+#define rte_sched_port_set_queue_empty_timestamp(port, qindex)
+
+#endif /* RTE_SCHED_RED */
+
+#if RTE_SCHED_DEBUG
+
+static inline int
+debug_pipe_is_empty(struct rte_sched_port *port, uint32_t pindex)
+{
+       uint32_t qindex, i;
+
+       qindex = pindex << 4;
+       
+       for (i = 0; i < 16; i ++){
+               uint32_t queue_empty = rte_sched_port_queue_is_empty(port, qindex + i);
+               uint32_t bmp_bit_clear = (rte_bitmap_get(&port->bmp, qindex + i) == 0);
+               
+               if (queue_empty != bmp_bit_clear){
+                       rte_panic("Queue status mismatch for queue %u of pipe %u\n", i, pindex);
+               }
+               
+               if (!queue_empty){
+                       return 0;
+               }
+       }
+       
+       return 1;
+}
+
+static inline void
+debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+       uint64_t mask;
+       uint32_t i, panic;
+       
+       if (bmp_slab == 0){
+               rte_panic("Empty slab at position %u\n", bmp_pos);
+       }
+       
+       panic = 0;
+       for (i = 0, mask = 1; i < 64; i ++, mask <<= 1) {
+               if (mask & bmp_slab){
+                       if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) {
+                               printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
+                               panic = 1;
+                       }
+               }
+       }
+       
+       if (panic){
+               rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
+                       bmp_slab, bmp_pos);
+       }
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+static inline uint32_t
+rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_port *port, struct rte_mbuf *pkt)
+{
+       struct rte_sched_queue *q;
+#ifdef RTE_SCHED_COLLECT_STATS
+       struct rte_sched_queue_extra *qe;
+#endif
+       uint32_t subport, pipe, traffic_class, queue, qindex;
+
+       rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+       
+       qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+       q = port->queue + qindex;
+       rte_prefetch0(q);
+#ifdef RTE_SCHED_COLLECT_STATS
+       qe = port->queue_extra + qindex;
+       rte_prefetch0(qe);
+#endif
+       
+       return qindex;
+}
+
+static inline void
+rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase)
+{      
+       struct rte_sched_queue *q;
+       struct rte_mbuf **q_qw;
+       uint16_t qsize; 
+       
+       q = port->queue + qindex;
+       qsize = rte_sched_port_qsize(port, qindex);
+       q_qw = qbase + (q->qw & (qsize - 1));
+       
+       rte_prefetch0(q_qw);
+       rte_bitmap_prefetch0(&port->bmp, qindex);
+}
+
+static inline int
+rte_sched_port_enqueue_qwa(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase, struct rte_mbuf *pkt)
+{
+       struct rte_sched_queue *q;
+       uint16_t qsize;
+       uint16_t qlen;
+
+       q = port->queue + qindex;
+       qsize = rte_sched_port_qsize(port, qindex);
+       qlen = q->qw - q->qr;
+
+       /* Drop the packet (and update drop stats) when queue is full */
+       if (unlikely(rte_sched_port_red_drop(port, pkt, qindex, qlen) || (qlen >= qsize))) {
+               rte_pktmbuf_free(pkt);
+#ifdef RTE_SCHED_COLLECT_STATS
+               rte_sched_port_update_subport_stats_on_drop(port, qindex, pkt);
+               rte_sched_port_update_queue_stats_on_drop(port, qindex, pkt);
+#endif
+               return 0;
+       }
+       
+       /* Enqueue packet */
+       qbase[q->qw & (qsize - 1)] = pkt;
+       q->qw ++;
+       
+       /* Activate queue in the port bitmap */
+       rte_bitmap_set(&port->bmp, qindex);
+       
+       /* Statistics */
+#ifdef RTE_SCHED_COLLECT_STATS
+       rte_sched_port_update_subport_stats(port, qindex, pkt);
+       rte_sched_port_update_queue_stats(port, qindex, pkt);
+#endif
+
+       return 1;
+}
+
+#if RTE_SCHED_ENQUEUE == 0
+
+int 
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+       uint32_t result, i;
+       
+       result = 0;
+       
+       for (i = 0; i < n_pkts; i ++) {
+               struct rte_mbuf *pkt;
+               struct rte_mbuf **q_base;
+               uint32_t subport, pipe, traffic_class, queue, qindex;
+               
+               pkt = pkts[i];
+               
+               rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+
+               qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+               
+               q_base = rte_sched_port_qbase(port, qindex);
+
+               result += rte_sched_port_enqueue_qwa(port, qindex, q_base, pkt);
+       }
+       
+       return result;
+}
+
+#else
+
+/* The enqueue function implements a 4-level pipeline with each stage processing 
+ * two different packets. The purpose of using a pipeline is to hide the latency 
+ * of prefetching the data structures. The naming convention is presented in the
+ * diagram below:
+ * 
+ *   p00  _______   p10  _______   p20  _______   p30  _______       
+ * ----->|       |----->|       |----->|       |----->|       |----->
+ *       |   0   |      |   1   |      |   2   |      |   3   |      
+ * ----->|_______|----->|_______|----->|_______|----->|_______|----->
+ *   p01            p11            p21            p31                
+ *
+ ***/
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+       struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21, *pkt30, *pkt31, *pkt_last;
+       struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base, **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
+       uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
+       uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
+       uint32_t result, i;
+       
+       result = 0;
+       
+       /* Less then 6 input packets available, which is not enough to feed the pipeline */
+       if (unlikely(n_pkts < 6)) {
+               struct rte_mbuf **q_base[5];
+               uint32_t q[5];
+               
+               /* Prefetch the mbuf structure of each packet */
+               for (i = 0; i < n_pkts; i ++) {
+                       rte_prefetch0(pkts[i]);
+               }
+               
+               /* Prefetch the queue structure for each queue */
+               for (i = 0; i < n_pkts; i ++) {
+                       q[i] = rte_sched_port_enqueue_qptrs_prefetch0(port, pkts[i]);
+               }
+               
+               /* Prefetch the write pointer location of each queue */
+               for (i = 0; i < n_pkts; i ++) {
+                       q_base[i] = rte_sched_port_qbase(port, q[i]);
+                       rte_sched_port_enqueue_qwa_prefetch0(port, q[i], q_base[i]);
+               }
+               
+               /* Write each packet to its queue */
+               for (i = 0; i < n_pkts; i ++) {
+                       result += rte_sched_port_enqueue_qwa(port, q[i], q_base[i], pkts[i]);
+               }
+               
+               return result;
+       }
+       
+       /* Feed the first 3 stages of the pipeline (6 packets needed) */
+       pkt20 = pkts[0];
+       pkt21 = pkts[1];
+       rte_prefetch0(pkt20);
+       rte_prefetch0(pkt21);
+       
+       pkt10 = pkts[2];
+       pkt11 = pkts[3];
+       rte_prefetch0(pkt10);
+       rte_prefetch0(pkt11);
+
+       q20 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt20);
+       q21 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt21);
+
+       pkt00 = pkts[4];
+       pkt01 = pkts[5];
+       rte_prefetch0(pkt00);
+       rte_prefetch0(pkt01);
+       
+       q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+       q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+
+       q20_base = rte_sched_port_qbase(port, q20);
+       q21_base = rte_sched_port_qbase(port, q21);     
+       rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+       rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+       
+       /* Run the pipeline */
+       for (i = 6; i < (n_pkts & (~1)); i += 2) {      
+               /* Propagate stage inputs */
+               pkt30 = pkt20;
+               pkt31 = pkt21;
+               pkt20 = pkt10;
+               pkt21 = pkt11;
+               pkt10 = pkt00;
+               pkt11 = pkt01;
+               q30 = q20;
+               q31 = q21;
+               q20 = q10;
+               q21 = q11;
+               q30_base = q20_base;
+               q31_base = q21_base;
+               
+               /* Stage 0: Get packets in */
+               pkt00 = pkts[i];
+               pkt01 = pkts[i + 1];
+               rte_prefetch0(pkt00);
+               rte_prefetch0(pkt01);
+               
+               /* Stage 1: Prefetch queue structure storing queue pointers */
+               q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+               q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+               
+               /* Stage 2: Prefetch queue write location */
+               q20_base = rte_sched_port_qbase(port, q20);
+               q21_base = rte_sched_port_qbase(port, q21);
+               rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+               rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+               
+               /* Stage 3: Write packet to queue and activate queue */
+               r30 = rte_sched_port_enqueue_qwa(port, q30, q30_base, pkt30);
+               r31 = rte_sched_port_enqueue_qwa(port, q31, q31_base, pkt31);
+               result += r30 + r31;
+       }
+       
+       /* Drain the pipeline (exactly 6 packets). Handle the last packet in the case
+       of an odd number of input packets. */
+       pkt_last = pkts[n_pkts - 1];
+       rte_prefetch0(pkt_last);
+       
+       q00 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt00);
+       q01 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt01);
+
+       q10_base = rte_sched_port_qbase(port, q10);
+       q11_base = rte_sched_port_qbase(port, q11);
+       rte_sched_port_enqueue_qwa_prefetch0(port, q10, q10_base);
+       rte_sched_port_enqueue_qwa_prefetch0(port, q11, q11_base);
+               
+       r20 = rte_sched_port_enqueue_qwa(port, q20, q20_base, pkt20);
+       r21 = rte_sched_port_enqueue_qwa(port, q21, q21_base, pkt21);
+       result += r20 + r21;
+       
+       q_last = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt_last);
+
+       q00_base = rte_sched_port_qbase(port, q00);
+       q01_base = rte_sched_port_qbase(port, q01);
+       rte_sched_port_enqueue_qwa_prefetch0(port, q00, q00_base);
+       rte_sched_port_enqueue_qwa_prefetch0(port, q01, q01_base);
+       
+       r10 = rte_sched_port_enqueue_qwa(port, q10, q10_base, pkt10);
+       r11 = rte_sched_port_enqueue_qwa(port, q11, q11_base, pkt11);
+       result += r10 + r11;
+
+       q_last_base = rte_sched_port_qbase(port, q_last);
+       rte_sched_port_enqueue_qwa_prefetch0(port, q_last, q_last_base);
+
+       r00 = rte_sched_port_enqueue_qwa(port, q00, q00_base, pkt00);
+       r01 = rte_sched_port_enqueue_qwa(port, q01, q01_base, pkt01);
+       result += r00 + r01;
+
+       if (n_pkts & 1) {
+               r_last = rte_sched_port_enqueue_qwa(port, q_last, q_last_base, pkt_last);
+               result += r_last;
+       }
+       
+       return result;
+}
+
+#endif /* RTE_SCHED_ENQUEUE */
+
+#if RTE_SCHED_TS_CREDITS_UPDATE == 0
+
+#define grinder_credits_update(port, pos)
+
+#elif !defined(RTE_SCHED_SUBPORT_TC_OV)
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_subport *subport = grinder->subport;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       struct rte_sched_pipe_profile *params = grinder->pipe_params;
+       uint64_t n_periods;
+       
+       /* Subport TB */
+       n_periods = (port->time - subport->tb_time) / subport->tb_period;
+       subport->tb_credits += n_periods * subport->tb_credits_per_period;
+       subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+       subport->tb_time += n_periods * subport->tb_period;
+       
+       /* Pipe TB */
+       n_periods = (port->time - pipe->tb_time) / params->tb_period;
+       pipe->tb_credits += n_periods * params->tb_credits_per_period;
+       pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+       pipe->tb_time += n_periods * params->tb_period;
+
+       /* Subport TCs */
+       if (unlikely(port->time >= subport->tc_time)) {
+               subport->tc_credits[0] = subport->tc_credits_per_period[0];
+               subport->tc_credits[1] = subport->tc_credits_per_period[1];
+               subport->tc_credits[2] = subport->tc_credits_per_period[2];
+               subport->tc_credits[3] = subport->tc_credits_per_period[3];
+               subport->tc_time = port->time + subport->tc_period;
+       }
+       
+       /* Pipe TCs */
+       if (unlikely(port->time >= pipe->tc_time)) {
+               pipe->tc_credits[0] = params->tc_credits_per_period[0];
+               pipe->tc_credits[1] = params->tc_credits_per_period[1];
+               pipe->tc_credits[2] = params->tc_credits_per_period[2];
+               pipe->tc_credits[3] = params->tc_credits_per_period[3];
+               pipe->tc_time = port->time + params->tc_period;
+       }
+}
+
+#else
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_subport *subport = grinder->subport;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       struct rte_sched_pipe_profile *params = grinder->pipe_params;
+       uint64_t n_periods;
+       
+       /* Subport TB */
+       n_periods = (port->time - subport->tb_time) / subport->tb_period;
+       subport->tb_credits += n_periods * subport->tb_credits_per_period;
+       subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+       subport->tb_time += n_periods * subport->tb_period;
+       
+       /* Pipe TB */
+       n_periods = (port->time - pipe->tb_time) / params->tb_period;
+       pipe->tb_credits += n_periods * params->tb_credits_per_period;
+       pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+       pipe->tb_time += n_periods * params->tb_period;
+
+       /* Subport TCs */
+       if (unlikely(port->time >= subport->tc_ov_time)) {
+               uint64_t n_ov_periods;
+               
+               if (unlikely(port->time >= subport->tc_time)) {
+                       subport->tc_credits[0] = subport->tc_credits_per_period[0];
+                       subport->tc_credits[1] = subport->tc_credits_per_period[1];
+                       subport->tc_credits[2] = subport->tc_credits_per_period[2];
+                       subport->tc_credits[3] = subport->tc_credits_per_period[3];
+                       
+                       subport->tc_time = port->time + subport->tc_period;
+               }
+               
+               n_ov_periods = (subport->tc_time - port->time + subport->tc_ov_period - 1) / subport->tc_ov_period;
+               
+               subport->tc_ov_credits[0] = subport->tc_credits[0] / (n_ov_periods * subport->tc_ov_n[0]);
+               subport->tc_ov_credits[1] = subport->tc_credits[1] / (n_ov_periods * subport->tc_ov_n[1]);
+               subport->tc_ov_credits[2] = subport->tc_credits[2] / (n_ov_periods * subport->tc_ov_n[2]);
+               subport->tc_ov_credits[3] = subport->tc_credits[3] / (n_ov_periods * subport->tc_ov_n[3]);
+               
+               subport->tc_ov_time = port->time + subport->tc_ov_period;
+               subport->tc_ov_period_id ++;
+       }
+       
+       /* Pipe TCs */
+       if (unlikely(port->time >= pipe->tc_time)) {
+               pipe->tc_credits[0] = params->tc_credits_per_period[0];
+               pipe->tc_credits[1] = params->tc_credits_per_period[1];
+               pipe->tc_credits[2] = params->tc_credits_per_period[2];
+               pipe->tc_credits[3] = params->tc_credits_per_period[3];
+               pipe->tc_time = port->time + params->tc_period;
+       }
+       if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
+               uint32_t pipe_tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+               uint32_t tc_mask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+               uint32_t mask[] = {UINT32_MAX, 0};
+               
+               tc_mask[0] = mask[subport->tc_ov[0]];
+               tc_mask[1] = mask[subport->tc_ov[1]];
+               tc_mask[2] = mask[subport->tc_ov[2]];
+               tc_mask[3] = mask[subport->tc_ov[3]];
+               
+               pipe_tc_ov_credits[0] = subport->tc_ov_credits[0] * params->tc_ov_weight[0];
+               pipe_tc_ov_credits[1] = subport->tc_ov_credits[1] * params->tc_ov_weight[1];
+               pipe_tc_ov_credits[2] = subport->tc_ov_credits[2] * params->tc_ov_weight[2];
+               pipe_tc_ov_credits[3] = subport->tc_ov_credits[3] * params->tc_ov_weight[3];
+               
+               pipe->tc_ov_credits[0] = (tc_mask[0] & pipe->tc_credits[0]) | ((~ tc_mask[0]) & pipe_tc_ov_credits[0]);
+               pipe->tc_ov_credits[1] = (tc_mask[1] & pipe->tc_credits[1]) | ((~ tc_mask[1]) & pipe_tc_ov_credits[1]);
+               pipe->tc_ov_credits[2] = (tc_mask[2] & pipe->tc_credits[2]) | ((~ tc_mask[2]) & pipe_tc_ov_credits[2]);
+               pipe->tc_ov_credits[3] = (tc_mask[3] & pipe->tc_credits[3]) | ((~ tc_mask[3]) & pipe_tc_ov_credits[3]);
+               
+               pipe->tc_ov_period_id = subport->tc_ov_period_id;
+       }
+}
+
+#endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
+
+#ifndef RTE_SCHED_SUBPORT_TC_OV
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_subport *subport = grinder->subport;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       struct rte_mbuf *pkt = grinder->pkt;
+       uint32_t tc_index = grinder->tc_index;
+       uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+       int enough_credits;
+
+       /* Check queue credits */
+       enough_credits = (pkt_len <= subport->tb_credits) &&
+               (pkt_len <= subport->tc_credits[tc_index]) &&
+               (pkt_len <= pipe->tb_credits) &&
+               (pkt_len <= pipe->tc_credits[tc_index]);
+       
+       if (!enough_credits) {
+               return 0;
+       }
+       
+       /* Update port credits */
+       subport->tb_credits -= pkt_len;
+       subport->tc_credits[tc_index] -= pkt_len;
+       pipe->tb_credits -= pkt_len;
+       pipe->tc_credits[tc_index] -= pkt_len;
+
+       return 1;
+}
+
+#else
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_subport *subport = grinder->subport;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       struct rte_mbuf *pkt = grinder->pkt;
+       uint32_t tc_index = grinder->tc_index;
+       uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+       uint32_t subport_tb_credits = subport->tb_credits;
+       uint32_t subport_tc_credits = subport->tc_credits[tc_index];
+       uint32_t pipe_tb_credits = pipe->tb_credits;
+       uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
+       uint32_t pipe_tc_ov_credits = pipe->tc_ov_credits[tc_index];
+       int enough_credits;
+       
+       /* Check pipe and subport credits */
+       enough_credits = (pkt_len <= subport_tb_credits) &&
+               (pkt_len <= subport_tc_credits) &&
+               (pkt_len <= pipe_tb_credits) &&
+               (pkt_len <= pipe_tc_credits) &&
+               (pkt_len <= pipe_tc_ov_credits);
+       
+       if (!enough_credits) {
+               return 0;
+       }
+       
+       /* Update pipe and subport credits */
+       subport->tb_credits -= pkt_len;
+       subport->tc_credits[tc_index] -= pkt_len;
+       pipe->tb_credits -= pkt_len;
+       pipe->tc_credits[tc_index] -= pkt_len;
+       pipe->tc_ov_credits[tc_index] -= pkt_len;
+       
+       return 1;
+}
+
+#endif /* RTE_SCHED_SUBPORT_TC_OV */
+
+static inline int 
+grinder_schedule(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+       struct rte_mbuf *pkt = grinder->pkt;
+       uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+
+#if RTE_SCHED_TS_CREDITS_CHECK
+       if (!grinder_credits_check(port, pos)) {
+               return 0;
+       }
+#endif
+
+       /* Advance port time */
+       port->time += pkt_len;
+       
+       /* Send packet */
+       port->pkts_out[port->n_pkts_out ++] = pkt;
+       queue->qr ++;
+       grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos];
+       if (queue->qr == queue->qw) {
+               uint32_t qindex = grinder->qindex[grinder->qpos];
+
+               rte_bitmap_clear(&port->bmp, qindex);
+               grinder->qmask &= ~(1 << grinder->qpos);
+               grinder->wrr_mask[grinder->qpos] = 0;
+               rte_sched_port_set_queue_empty_timestamp(port, qindex);
+       }
+       
+       /* Reset pipe loop detection */
+       port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+       grinder->productive = 1;
+       
+       return 1;
+}
+
+#if RTE_SCHED_OPTIMIZATIONS
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+       __m128i index = _mm_set1_epi32 (base_pipe);
+       __m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos);
+       __m128i res = _mm_cmpeq_epi32(pipes, index);
+       pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4));
+       pipes = _mm_cmpeq_epi32(pipes, index);
+       res = _mm_or_si128(res, pipes);
+
+       if (_mm_testz_si128(res, res))
+               return 0;
+
+       return 1;
+}
+
+#else
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+       uint32_t i;
+       
+       for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) {
+               if (port->grinder_base_bmp_pos[i] == base_pipe) {
+                       return 1;
+               }
+       }
+       
+       return 0;
+}
+
+#endif /* RTE_SCHED_OPTIMIZATIONS */
+
+static inline void
+grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint16_t w[4];
+
+       grinder->pcache_w = 0;
+       grinder->pcache_r = 0;
+       
+       w[0] = (uint16_t) bmp_slab;
+       w[1] = (uint16_t) (bmp_slab >> 16);
+       w[2] = (uint16_t) (bmp_slab >> 32);
+       w[3] = (uint16_t) (bmp_slab >> 48);
+       
+       grinder->pcache_qmask[grinder->pcache_w] = w[0];
+       grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
+       grinder->pcache_w += (w[0] != 0);
+       
+       grinder->pcache_qmask[grinder->pcache_w] = w[1];
+       grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
+       grinder->pcache_w += (w[1] != 0);
+       
+       grinder->pcache_qmask[grinder->pcache_w] = w[2];
+       grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
+       grinder->pcache_w += (w[2] != 0);
+       
+       grinder->pcache_qmask[grinder->pcache_w] = w[3];
+       grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
+       grinder->pcache_w += (w[3] != 0);
+}
+
+static inline void
+grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint8_t b[4];
+       
+       grinder->tccache_w = 0;
+       grinder->tccache_r = 0;
+       
+       b[0] = (uint8_t) (qmask & 0xF);
+       b[1] = (uint8_t) ((qmask >> 4) & 0xF);
+       b[2] = (uint8_t) ((qmask >> 8) & 0xF);
+       b[3] = (uint8_t) ((qmask >> 12) & 0xF);
+       
+       grinder->tccache_qmask[grinder->tccache_w] = b[0];
+       grinder->tccache_qindex[grinder->tccache_w] = qindex;
+       grinder->tccache_w += (b[0] != 0);
+       
+       grinder->tccache_qmask[grinder->tccache_w] = b[1];
+       grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
+       grinder->tccache_w += (b[1] != 0);
+       
+       grinder->tccache_qmask[grinder->tccache_w] = b[2];
+       grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
+       grinder->tccache_w += (b[2] != 0);
+       
+       grinder->tccache_qmask[grinder->tccache_w] = b[3];
+       grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
+       grinder->tccache_w += (b[3] != 0);
+}
+
+static inline int
+grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_mbuf **qbase;
+       uint32_t qindex; 
+       uint16_t qsize; 
+
+       if (grinder->tccache_r == grinder->tccache_w) {
+               return 0;
+       }
+
+       qindex = grinder->tccache_qindex[grinder->tccache_r];
+       qbase = rte_sched_port_qbase(port, qindex);
+       qsize = rte_sched_port_qsize(port, qindex);
+
+       grinder->tc_index = (qindex >> 2) & 0x3;
+       grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
+       grinder->qsize = qsize;
+       
+       grinder->qindex[0] = qindex;
+       grinder->qindex[1] = qindex + 1;
+       grinder->qindex[2] = qindex + 2;
+       grinder->qindex[3] = qindex + 3;
+
+       grinder->queue[0] = port->queue + qindex;
+       grinder->queue[1] = port->queue + qindex + 1;
+       grinder->queue[2] = port->queue + qindex + 2;
+       grinder->queue[3] = port->queue + qindex + 3;
+
+       grinder->qbase[0] = qbase;
+       grinder->qbase[1] = qbase + qsize;
+       grinder->qbase[2] = qbase + 2 * qsize;
+       grinder->qbase[3] = qbase + 3 * qsize;
+       
+       grinder->tccache_r ++;
+       return 1;
+}
+
+static inline int
+grinder_next_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint32_t pipe_qindex;
+       uint16_t pipe_qmask;
+
+       if (grinder->pcache_r < grinder->pcache_w) {
+               pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
+               pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
+               grinder->pcache_r ++;
+       } else {
+               uint64_t bmp_slab = 0;
+               uint32_t bmp_pos = 0;
+               
+               /* Get another non-empty pipe group */          
+               if (unlikely(rte_bitmap_scan(&port->bmp, &bmp_pos, &bmp_slab) <= 0)) {
+                       return 0;
+               }
+               
+#if RTE_SCHED_DEBUG
+               debug_check_queue_slab(port, bmp_pos, bmp_slab);
+#endif 
+
+               /* Return if pipe group already in one of the other grinders */
+               port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
+               if (unlikely(grinder_pipe_exists(port, bmp_pos))) {
+                       return 0;
+               }
+               port->grinder_base_bmp_pos[pos] = bmp_pos;
+               
+               /* Install new pipe group into grinder's pipe cache */
+               grinder_pcache_populate(port, pos, bmp_pos, bmp_slab);
+
+               pipe_qmask = grinder->pcache_qmask[0];
+               pipe_qindex = grinder->pcache_qindex[0];
+               grinder->pcache_r = 1;
+       }
+       
+       /* Install new pipe in the grinder */
+       grinder->pindex = pipe_qindex >> 4;
+       grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport);
+       grinder->pipe = port->pipe + grinder->pindex;
+       grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
+       grinder->productive = 0;
+
+       grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask);
+       grinder_next_tc(port, pos);
+       
+       /* Check for pipe exhaustion */
+       if (grinder->pindex == port->pipe_loop) {
+               port->pipe_exhaustion = 1;
+               port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+       }
+       
+       return 1;       
+}
+
+#if RTE_SCHED_WRR == 0
+
+#define grinder_wrr_load(a,b)
+
+#define grinder_wrr_store(a,b)
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint64_t slab = grinder->qmask;
+       
+       if (rte_bsf64(slab, &grinder->qpos) == 0) {
+               rte_panic("grinder wrr\n");
+       }
+}
+
+#elif RTE_SCHED_WRR == 1
+
+static inline void
+grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
+       uint32_t tc_index = grinder->tc_index;
+       uint32_t qmask = grinder->qmask;
+       uint32_t qindex;
+       
+       qindex = tc_index * 4;
+       
+       grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
+       grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
+       grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
+       grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
+       
+       grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
+       grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
+       grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
+       grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
+       
+       grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
+       grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
+       grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
+       grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+}
+
+static inline void
+grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       uint32_t tc_index = grinder->tc_index;
+       uint32_t qindex;
+       
+       qindex = tc_index * 4;
+       
+       pipe->wrr_tokens[qindex] = (uint8_t) ((grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >> RTE_SCHED_WRR_SHIFT);
+       pipe->wrr_tokens[qindex + 1] = (uint8_t) ((grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >> RTE_SCHED_WRR_SHIFT);
+       pipe->wrr_tokens[qindex + 2] = (uint8_t) ((grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >> RTE_SCHED_WRR_SHIFT);
+       pipe->wrr_tokens[qindex + 3] = (uint8_t) ((grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >> RTE_SCHED_WRR_SHIFT);
+}
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint16_t wrr_tokens_min;
+
+       grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
+       grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
+       grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
+       grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
+       
+       grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
+       wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
+       
+       grinder->wrr_tokens[0] -= wrr_tokens_min;
+       grinder->wrr_tokens[1] -= wrr_tokens_min;
+       grinder->wrr_tokens[2] -= wrr_tokens_min;
+       grinder->wrr_tokens[3] -= wrr_tokens_min;
+}
+
+#else
+
+#error Invalid value for RTE_SCHED_WRR
+
+#endif /* RTE_SCHED_WRR */
+
+#define grinder_evict(port, pos)
+
+static inline void
+grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       
+       rte_prefetch0(grinder->pipe);
+       rte_prefetch0(grinder->queue[0]);
+}
+
+static inline void
+grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint16_t qsize, qr[4];
+       
+       qsize = grinder->qsize;
+       qr[0] = grinder->queue[0]->qr & (qsize - 1);
+       qr[1] = grinder->queue[1]->qr & (qsize - 1);
+       qr[2] = grinder->queue[2]->qr & (qsize - 1);
+       qr[3] = grinder->queue[3]->qr & (qsize - 1);
+       
+       rte_prefetch0(grinder->qbase[0] + qr[0]);
+       rte_prefetch0(grinder->qbase[1] + qr[1]);
+
+       grinder_wrr_load(port, pos);
+       grinder_wrr(port, pos);
+       
+       rte_prefetch0(grinder->qbase[2] + qr[2]);
+       rte_prefetch0(grinder->qbase[3] + qr[3]);       
+}
+
+static inline void
+grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       uint32_t qpos = grinder->qpos;
+       struct rte_mbuf **qbase = grinder->qbase[qpos];
+       uint16_t qsize = grinder->qsize;
+       uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
+       
+       grinder->pkt = qbase[qr];
+       rte_prefetch0(grinder->pkt);
+       
+       if (unlikely((qr & 0x7) == 7)) {
+               uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
+               
+               rte_prefetch0(qbase + qr_next);
+       }
+}
+
+static inline uint32_t
+grinder_handle(struct rte_sched_port *port, uint32_t pos)
+{
+       struct rte_sched_grinder *grinder = port->grinder + pos;
+       
+       switch (grinder->state) {
+       case e_GRINDER_PREFETCH_PIPE:
+       {
+               if (grinder_next_pipe(port, pos)) {
+                       grinder_prefetch_pipe(port, pos);
+                       port->busy_grinders ++;
+                       
+                       grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+                       return 0;
+               }
+               
+               return 0;
+       }
+
+       case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
+       {
+               struct rte_sched_pipe *pipe = grinder->pipe;
+               
+               grinder->pipe_params = port->pipe_profiles + pipe->profile;
+               grinder_prefetch_tc_queue_arrays(port, pos);
+               grinder_credits_update(port, pos);
+               
+               grinder->state = e_GRINDER_PREFETCH_MBUF;
+               return 0;
+       }
+       
+       case e_GRINDER_PREFETCH_MBUF:
+       {
+               grinder_prefetch_mbuf(port, pos);
+               
+               grinder->state = e_GRINDER_READ_MBUF;
+               return 0;
+       }
+       
+       case e_GRINDER_READ_MBUF:
+       {
+               uint32_t result = 0;
+               
+               result = grinder_schedule(port, pos);
+               
+               /* Look for next packet within the same TC */
+               if (result && grinder->qmask) {
+                       grinder_wrr(port, pos);
+                       grinder_prefetch_mbuf(port, pos);
+                       
+                       return 1;
+               }
+               grinder_wrr_store(port, pos);
+               
+               /* Look for another active TC within same pipe */
+               if (grinder_next_tc(port, pos)) {
+                       grinder_prefetch_tc_queue_arrays(port, pos);
+                       
+                       grinder->state = e_GRINDER_PREFETCH_MBUF;
+                       return result;
+               }               
+               if ((grinder->productive == 0) && (port->pipe_loop == RTE_SCHED_PIPE_INVALID)) {
+                       port->pipe_loop = grinder->pindex;
+               }
+               grinder_evict(port, pos);
+               
+               /* Look for another active pipe */
+               if (grinder_next_pipe(port, pos)) {
+                       grinder_prefetch_pipe(port, pos);
+                       
+                       grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+                       return result;
+               }
+               
+               /* No active pipe found */
+               port->busy_grinders --;
+               
+               grinder->state = e_GRINDER_PREFETCH_PIPE;
+               return result;
+       }
+       
+       default:
+               rte_panic("Algorithmic error (invalid state)\n");
+               return 0;
+       }
+}
+
+static inline void 
+rte_sched_port_time_resync(struct rte_sched_port *port)
+{
+       uint64_t cycles = rte_get_tsc_cycles();
+       uint64_t cycles_diff = cycles - port->time_cpu_cycles;
+       double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte;
+       
+       /* Advance port time */
+       port->time_cpu_cycles = cycles;
+       port->time_cpu_bytes += (uint64_t) bytes_diff;
+       if (port->time < port->time_cpu_bytes) {
+               port->time = port->time_cpu_bytes;
+       }
+
+       /* Reset pipe loop detection */
+       port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+}
+
+static inline int
+rte_sched_port_exceptions(struct rte_sched_port *port)
+{
+       int exceptions;
+
+       /* Check if any exception flag is set */
+       exceptions = (port->busy_grinders == 0) ||
+               (port->pipe_exhaustion == 1);
+       
+       /* Clear exception flags */
+       port->pipe_exhaustion = 0;
+       
+       return exceptions;
+}
+
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+       uint32_t i, count;
+       
+       port->pkts_out = pkts;
+       port->n_pkts_out = 0;
+       
+       rte_sched_port_time_resync(port);
+       
+       /* Take each queue in the grinder one step further */
+       for (i = 0, count = 0; ; i ++)  {
+               count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1));
+               if ((count == n_pkts) || rte_sched_port_exceptions(port)) {
+                       break;
+               }
+       }
+       
+       return count;
+}
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
new file mode 100644 (file)
index 0000000..7b49248
--- /dev/null
@@ -0,0 +1,446 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_H__
+#define __INCLUDE_RTE_SCHED_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Hierarchical Scheduler
+ *
+ * The hierarchical scheduler prioritizes the transmission of packets from different
+ * users and traffic classes according to the Service Level Agreements (SLAs) defined
+ * for the current network node.
+ *
+ * The scheduler supports thousands of packet queues grouped under a 5-level hierarchy:
+ *     1. Port: 
+ *           - Typical usage: output Ethernet port;
+ *           - Multiple ports are scheduled in round robin order with equal priority;
+ *     2. Subport:
+ *           - Typical usage: group of users;
+ *           - Traffic shaping using the token bucket algorithm (one bucket per subport);
+ *           - Upper limit enforced per traffic class at subport level;
+ *           - Lower priority traffic classes able to reuse subport bandwidth currently
+ *             unused by higher priority traffic classes of the same subport;
+ *           - When any subport traffic class is oversubscribed (configuration time 
+ *             event), the usage of subport member pipes with high demand for that 
+ *             traffic class pipes is truncated to a dynamically adjusted value with no 
+ *             impact to low demand pipes;
+ *     3. Pipe: 
+ *           - Typical usage: individual user/subscriber;
+ *           - Traffic shaping using the token bucket algorithm (one bucket per pipe);
+ *     4. Traffic class:
+ *           - Traffic classes of the same pipe handled in strict priority order;
+ *           - Upper limit enforced per traffic class at the pipe level;
+ *           - Lower priority traffic classes able to reuse pipe bandwidth currently
+ *             unused by higher priority traffic classes of the same pipe;
+ *     5. Queue:
+ *           - Typical usage: queue hosting packets from one or multiple connections 
+ *             of same traffic class belonging to the same user;
+ *           - Weighted Round Robin (WRR) is used to service the queues within same 
+ *             pipe traffic class.
+ *
+ ***/
+
+#include <sys/types.h>
+#include <rte_mbuf.h>
+#include <rte_meter.h>
+
+/** Random Early Detection (RED) */
+#ifdef RTE_SCHED_RED
+#include "rte_red.h"
+#endif
+
+/** Number of traffic classes per pipe (as well as subport). Cannot be changed. */
+#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
+
+/** Number of queues per pipe traffic class. Cannot be changed. */
+#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
+
+/** Number of queues per pipe. */
+#define RTE_SCHED_QUEUES_PER_PIPE             \
+       (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *     \
+       RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+
+/** Maximum number of pipe profiles that can be defined per port. Compile-time configurable.*/
+#ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT
+#define RTE_SCHED_PIPE_PROFILES_PER_PORT      256
+#endif
+
+/** Ethernet framing overhead. Overhead fields per Ethernet frame:
+   1. Preamble:                             7 bytes;
+   2. Start of Frame Delimiter (SFD):       1 byte;
+   3. Frame Check Sequence (FCS):           4 bytes;
+   4. Inter Frame Gap (IFG):               12 bytes.
+The FCS is considered overhead only if not included in the packet length (field pkt.pkt_len
+of struct rte_mbuf). */
+#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
+#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
+#endif
+
+/** Subport configuration parameters. The period and credits_per_period parameters are measured
+in bytes, with one byte meaning the time duration associated with the transmission of one byte 
+on the physical medium of the output port, with pipe or pipe traffic class rate (measured as 
+percentage of output port rate) determined as credits_per_period divided by period. One credit
+represents one byte. */
+struct rte_sched_subport_params {
+       /* Subport token bucket */
+       uint32_t tb_rate;                /**< Subport token bucket rate (measured in bytes per second) */
+       uint32_t tb_size;                /**< Subport token bucket size (measured in credits) */
+       
+       /* Subport traffic classes */
+       uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Subport traffic class rates (measured in bytes per second) */
+       uint32_t tc_period;              /**< Enforcement period for traffic class rates (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+       uint32_t tc_ov_period;           /**< Enforcement period for traffic class oversubscription (measured in milliseconds) */
+#endif
+};
+
+/** Subport statistics */
+struct rte_sched_subport_stats {
+       /* Packets */
+       uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets successfully written to current
+                                             subport for each traffic class */
+       uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets dropped by the current
+                                             subport for each traffic class due to subport queues being full or congested*/
+       
+       /* Bytes */
+       uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes successfully written to current 
+                                             subport for each traffic class*/
+       uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes dropped by the current 
+                                          subport for each traffic class due to subport queues being full or congested */
+};
+
+/** Pipe configuration parameters. The period and credits_per_period parameters are measured
+in bytes, with one byte meaning the time duration associated with the transmission of one byte 
+on the physical medium of the output port, with pipe or pipe traffic class rate (measured as 
+percentage of output port rate) determined as credits_per_period divided by period. One credit
+represents one byte. */
+struct rte_sched_pipe_params {
+       /* Pipe token bucket */
+       uint32_t tb_rate;                /**< Pipe token bucket rate (measured in bytes per second) */
+       uint32_t tb_size;                /**< Pipe token bucket size (measured in credits) */
+       
+       /* Pipe traffic classes */
+       uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Pipe traffic class rates (measured in bytes per second) */
+       uint32_t tc_period;              /**< Enforcement period for pipe traffic class rates (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+       uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Traffic class weights to be used for the 
+                                             current pipe in the event of subport traffic class oversubscription */
+#endif
+       
+       /* Pipe queues */
+       uint8_t  wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights for the queues of the current pipe */
+};
+
+/** Queue statistics */
+struct rte_sched_queue_stats {
+       /* Packets */
+       uint32_t n_pkts;                 /**< Number of packets successfully written to current queue */
+       uint32_t n_pkts_dropped;         /**< Number of packets dropped due to current queue being full or congested */
+       
+       /* Bytes */
+       uint32_t n_bytes;                /**< Number of bytes successfully written to current queue */
+       uint32_t n_bytes_dropped;        /**< Number of bytes dropped due to current queue being full or congested */   
+};
+
+/** Port configuration parameters. */
+struct rte_sched_port_params {
+       const char *name;                /**< Literal string to be associated to the current port scheduler instance */
+       int socket;                      /**< CPU socket ID where the memory for port scheduler should be allocated */
+       uint32_t rate;                   /**< Output port rate (measured in bytes per second) */
+       uint32_t frame_overhead;         /**< Framing overhead per packet (measured in bytes) */
+       uint32_t n_subports_per_port;    /**< Number of subports for the current port scheduler instance*/
+       uint32_t n_pipes_per_subport;    /**< Number of pipes for each port scheduler subport */
+       uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Packet queue size for each traffic class. All queues 
+                                             within the same pipe traffic class have the same size. Queues from 
+                                                                                 different pipes serving the same traffic class have the same size. */
+       struct rte_sched_pipe_params *pipe_profiles; /**< Pipe profile table defined for current port scheduler instance.
+                                          Every pipe of the current port scheduler is configured using one of the
+                                                                                 profiles from this table. */
+       uint32_t n_pipe_profiles;        /**< Number of profiles in the pipe profile table */
+#ifdef RTE_SCHED_RED
+       struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */
+#endif
+};
+
+/** Path through the scheduler hierarchy used by the scheduler enqueue operation to
+identify the destination queue for the current packet. Stored in the field pkt.hash.sched
+of struct rte_mbuf of each packet, typically written by the classification stage and read by 
+scheduler enqueue.*/
+struct rte_sched_port_hierarchy {
+       uint32_t queue:2;                /**< Queue ID (0 .. 3) */
+       uint32_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
+       uint32_t pipe:20;                /**< Pipe ID */
+       uint32_t subport:6;              /**< Subport ID */
+       uint32_t color:2;                /**< Color */
+};
+
+/*
+ * Configuration
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler port configuration
+ *
+ * @param params
+ *   Port scheduler configuration parameter structure
+ * @return
+ *   Handle to port scheduler instance upon success or NULL otherwise.
+ */
+struct rte_sched_port * 
+rte_sched_port_config(struct rte_sched_port_params *params);
+
+/**
+ * Hierarchical scheduler port free
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ */
+void
+rte_sched_port_free(struct rte_sched_port *port);
+
+/**
+ * Hierarchical scheduler subport configuration
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param params
+ *   Subport configuration parameters
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_config(struct rte_sched_port *port, 
+       uint32_t subport_id,
+       struct rte_sched_subport_params *params);
+
+/**
+ * Hierarchical scheduler pipe configuration
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param pipe_id
+ *   Pipe ID within subport
+ * @param pipe_profile
+ *   ID of port-level pre-configured pipe profile
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+       uint32_t subport_id, 
+       uint32_t pipe_id,
+       int32_t pipe_profile);
+
+/**
+ * Hierarchical scheduler memory footprint size per port
+ *
+ * @param params
+ *   Port scheduler configuration parameter structure
+ * @return
+ *   Memory footprint size in bytes upon success, 0 otherwise
+ */
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
+
+/*
+ * Statistics 
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler subport statistics read
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param stats
+ *   Pointer to pre-allocated subport statistics structure where the statistics 
+ *   counters should be stored
+ * @param tc_ov
+ *   Pointer to pre-allocated 4-entry array where the oversubscription status for
+ *   each of the 4 subport traffic classes should be stored.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+       uint32_t subport_id,
+       struct rte_sched_subport_stats *stats,
+       uint32_t *tc_ov);
+
+/**
+ * Hierarchical scheduler queue statistics read
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param queue_id
+ *   Queue ID within port scheduler
+ * @param stats
+ *   Pointer to pre-allocated subport statistics structure where the statistics 
+ *   counters should be stored
+ * @param qlen
+ *   Pointer to pre-allocated variable where the current queue length should be stored.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+       uint32_t queue_id,
+       struct rte_sched_queue_stats *stats,
+       uint16_t *qlen);
+
+/* 
+ * Run-time 
+ *
+ ***/
+
+/**
+ * Scheduler hierarchy path write to packet descriptor. Typically called by the 
+ * packet classification stage.
+ * 
+ * @param pkt
+ *   Packet descriptor handle
+ * @param subport
+ *   Subport ID
+ * @param pipe
+ *   Pipe ID within subport
+ * @param traffic_class
+ *   Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ *   Queue ID within pipe traffic class (0 .. 3)
+ */
+static inline void
+rte_sched_port_pkt_write(struct rte_mbuf *pkt, 
+       uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color)
+{
+       struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+       
+       sched->color = (uint32_t) color;
+       sched->subport = subport;
+       sched->pipe = pipe;
+       sched->traffic_class = traffic_class;
+       sched->queue = queue;
+}
+
+/**
+ * Scheduler hierarchy path read from packet descriptor (struct rte_mbuf). Typically
+ * called as part of the hierarchical scheduler enqueue operation. The subport, 
+ * pipe, traffic class and queue parameters need to be pre-allocated by the caller.
+ *
+ * @param pkt
+ *   Packet descriptor handle
+ * @param subport
+ *   Subport ID
+ * @param pipe
+ *   Pipe ID within subport
+ * @param traffic_class
+ *   Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ *   Queue ID within pipe traffic class (0 .. 3)
+ *   
+ */
+static inline void
+rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue)
+{
+       struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+       
+       *subport = sched->subport;
+       *pipe = sched->pipe;
+       *traffic_class = sched->traffic_class;
+       *queue = sched->queue;
+}
+
+static inline enum rte_meter_color
+rte_sched_port_pkt_read_color(struct rte_mbuf *pkt)
+{
+       struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+
+       return (enum rte_meter_color) sched->color;
+}
+
+/**
+ * Hierarchical scheduler port enqueue. Writes up to n_pkts to port scheduler and 
+ * returns the number of packets actually written. For each packet, the port scheduler
+ * queue to write the packet to is identified by reading the hierarchy path from the 
+ * packet descriptor; if the queue is full or congested and the packet is not written 
+ * to the queue, then the packet is automatically dropped without any action required 
+ * from the caller.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param pkts
+ *   Array storing the packet descriptor handles
+ * @param n_pkts
+ *   Number of packets to enqueue from the pkts array into the port scheduler
+ * @return
+ *   Number of packets successfully enqueued
+ */
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+/**
+ * Hierarchical scheduler port dequeue. Reads up to n_pkts from the port scheduler 
+ * and stores them in the pkts array and returns the number of packets actually read. 
+ * The pkts array needs to be pre-allocated by the caller with at least n_pkts entries.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param pkts
+ *   Pre-allocated packet descriptor array where the packets dequeued from the port 
+ *   scheduler should be stored
+ * @param n_pkts
+ *   Number of packets to dequeue from the port scheduler
+ * @return
+ *   Number of packets successfully dequeued and placed in the pkts array
+ */
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_H__ */
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
new file mode 100644 (file)
index 0000000..dc76ad8
--- /dev/null
@@ -0,0 +1,130 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without 
+ *   modification, are permitted provided that the following conditions 
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright 
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright 
+ *       notice, this list of conditions and the following disclaimer in 
+ *       the documentation and/or other materials provided with the 
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its 
+ *       contributors may be used to endorse or promote products derived 
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_COMMON_H__
+#define __INCLUDE_RTE_SCHED_COMMON_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#define __rte_aligned_16 __attribute__((__aligned__(16)))
+
+static inline uint32_t
+rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
+{
+       return (x < y)? x : y;
+}
+
+#if 0
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+       uint32_t pos0, pos1;
+               
+       pos0 = (x[0] <= x[1])? 0 : 1;
+       pos1 = (x[2] <= x[3])? 2 : 3;
+
+       return (x[pos0] <= x[pos1])? pos0 : pos1;
+}
+
+#else
+
+/* simplified version to remove branches with CMOV instruction */
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+       uint32_t pos0 = 0;
+       uint32_t pos1 = 2;
+
+       if (x[1] <= x[0]) pos0 = 1;
+       if (x[3] <= x[2]) pos1 = 3;
+       if (x[pos1] <= x[pos0]) pos0 = pos1;
+
+       return pos0;
+}
+
+#endif
+
+/*
+ * Compute the Greatest Common Divisor (GCD) of two numbers.
+ * This implementation uses Euclid's algorithm:
+ *    gcd(a, 0) = a
+ *    gcd(a, b) = gcd(b, a mod b)
+ *
+ */
+static inline uint32_t
+rte_get_gcd(uint32_t a, uint32_t b)
+{
+       uint32_t c;
+       
+       if (a == 0)
+               return b;
+       if (b == 0)
+               return a;
+       
+       if (a < b) {
+               c = a;
+               a = b;
+               b = c;
+       }
+       
+       while (b != 0) {
+               c = a % b;
+               a = b;
+               b = c;
+       }
+       
+       return a;
+}
+
+/*
+ * Compute the Lowest Common Denominator (LCD) of two numbers.
+ * This implementation computes GCD first:
+ *    LCD(a, b) = (a * b) / GCD(a, b)
+ *
+ */
+static inline uint32_t
+rte_get_lcd(uint32_t a, uint32_t b)
+{
+       return (a * b) / rte_get_gcd(a, b);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */
index 8eb45d8..4b80255 100644 (file)
@@ -105,6 +105,12 @@ ifeq ($(CONFIG_RTE_LIBRTE_METER),y)
 LDLIBS += -lrte_meter
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
+LDLIBS += -lrte_sched
+LDLIBS += -lm
+LDLIBS += -lrt
+endif
+
 LDLIBS += --start-group
 
 ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y)