From de3cfa2c9823a7e0391d4f4a355e2d78b83eec62 Mon Sep 17 00:00:00 2001 From: Intel Date: Mon, 3 Jun 2013 00:00:00 +0000 Subject: [PATCH] sched: initial import Signed-off-by: Intel --- app/test/Makefile | 2 + app/test/autotest_data.py | 16 +- app/test/commands.c | 6 +- app/test/test.h | 2 + app/test/test_red.c | 1890 ++++++++++++++++ app/test/test_sched.c | 244 ++ config/defconfig_i686-default-linuxapp-gcc | 10 + config/defconfig_i686-default-linuxapp-icc | 10 + config/defconfig_x86_64-default-linuxapp-gcc | 11 + config/defconfig_x86_64-default-linuxapp-icc | 10 + examples/qos_sched/Makefile | 58 + examples/qos_sched/app_thread.c | 302 +++ examples/qos_sched/args.c | 467 ++++ examples/qos_sched/cfg_file.c | 631 ++++++ examples/qos_sched/cfg_file.h | 103 + examples/qos_sched/init.c | 385 ++++ examples/qos_sched/main.c | 246 ++ examples/qos_sched/main.h | 186 ++ examples/qos_sched/profile.cfg | 109 + lib/Makefile | 1 + lib/librte_eal/common/include/rte_log.h | 1 + lib/librte_mbuf/rte_mbuf.h | 1 + lib/librte_sched/Makefile | 56 + lib/librte_sched/rte_approx.c | 197 ++ lib/librte_sched/rte_approx.h | 76 + lib/librte_sched/rte_bitmap.h | 505 +++++ lib/librte_sched/rte_red.c | 160 ++ lib/librte_sched/rte_red.h | 454 ++++ lib/librte_sched/rte_sched.c | 2129 ++++++++++++++++++ lib/librte_sched/rte_sched.h | 446 ++++ lib/librte_sched/rte_sched_common.h | 130 ++ mk/rte.app.mk | 6 + 32 files changed, 8847 insertions(+), 3 deletions(-) create mode 100644 app/test/test_red.c create mode 100755 app/test/test_sched.c create mode 100755 examples/qos_sched/Makefile create mode 100755 examples/qos_sched/app_thread.c create mode 100755 examples/qos_sched/args.c create mode 100755 examples/qos_sched/cfg_file.c create mode 100755 examples/qos_sched/cfg_file.h create mode 100755 examples/qos_sched/init.c create mode 100755 examples/qos_sched/main.c create mode 100755 examples/qos_sched/main.h create mode 100644 examples/qos_sched/profile.cfg create mode 100644 lib/librte_sched/Makefile create mode 100644 lib/librte_sched/rte_approx.c create mode 100644 lib/librte_sched/rte_approx.h create mode 100644 lib/librte_sched/rte_bitmap.h create mode 100644 lib/librte_sched/rte_red.c create mode 100644 lib/librte_sched/rte_red.h create mode 100644 lib/librte_sched/rte_sched.c create mode 100644 lib/librte_sched/rte_sched.h create mode 100644 lib/librte_sched/rte_sched_common.h diff --git a/app/test/Makefile b/app/test/Makefile index 6ba18e44e2..4fecdb6258 100755 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -85,6 +85,8 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_ipaddr.c SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_cirbuf.c SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_string.c SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_lib.c +SRCS-$(CONFIG_RTE_APP_TEST) += test_red.c +SRCS-$(CONFIG_RTE_APP_TEST) += test_sched.c SRCS-$(CONFIG_RTE_APP_TEST) += test_meter.c SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_pm.c SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_acl.c diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index 9bd436bf9c..f2f9965d31 100755 --- a/app/test/autotest_data.py +++ b/app/test/autotest_data.py @@ -271,7 +271,7 @@ parallel_test_group_list = [ }, { "Prefix": "group_6", - "Memory" : all_sockets(588), + "Memory" : all_sockets(600), "Tests" : [ { @@ -297,7 +297,13 @@ parallel_test_group_list = [ "Command" : "prefetch_autotest", "Func" : default_autotest, "Report" : None, - }, + }, + { + "Name" :"Red autotest", + "Command" : "red_autotest", + "Func" :default_autotest, + "Report" :None, + }, ] }, { @@ -317,6 +323,12 @@ parallel_test_group_list = [ "Func" : default_autotest, "Report" : None, }, + { + "Name" :"Sched autotest", + "Command" : "sched_autotest", + "Func" :default_autotest, + "Report" :None, + }, ] }, ] diff --git a/app/test/commands.c b/app/test/commands.c index 2438433cb6..c7ac1e423a 100755 --- a/app/test/commands.c +++ b/app/test/commands.c @@ -167,6 +167,10 @@ static void cmd_autotest_parsed(void *parsed_result, ret |= test_memcpy_perf(); if (all || !strcmp(res->autotest, "func_reentrancy_autotest")) ret |= test_func_reentrancy(); + if (all || !strcmp(res->autotest, "red_autotest")) + ret |= test_red(); + if (all || !strcmp(res->autotest, "sched_autotest")) + ret |= test_sched(); if (all || !strcmp(res->autotest, "meter_autotest")) ret |= test_meter(); if (all || !strcmp(res->autotest, "pm_autotest")) @@ -203,7 +207,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest = "version_autotest#eal_fs_autotest#" "cmdline_autotest#func_reentrancy_autotest#" "mempool_perf_autotest#hash_perf_autotest#" - "meter_autotest#" + "red_autotest#meter_autotest#sched_autotest#" "memcpy_perf_autotest#pm_autotest#" "acl_autotest#power_autotest#" "all_autotests"); diff --git a/app/test/test.h b/app/test/test.h index 75df8d0064..6bac20943f 100755 --- a/app/test/test.h +++ b/app/test/test.h @@ -84,6 +84,8 @@ int test_version(void); int test_eal_fs(void); int test_cmdline(void); int test_func_reentrancy(void); +int test_red(void); +int test_sched(void); int test_meter(void); int test_pmac_pm(void); int test_pmac_acl(void); diff --git a/app/test/test_red.c b/app/test/test_red.c new file mode 100644 index 0000000000..f0833494a5 --- /dev/null +++ b/app/test/test_red.c @@ -0,0 +1,1890 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +#ifdef RTE_LIBRTE_SCHED + +#include + +#ifdef __INTEL_COMPILER +#pragma warning(disable:2259) /* conversion may lose significant bits */ +#pragma warning(disable:181) /* Arg incompatible with format string */ +#endif + +#define DIM(x) (sizeof(x)/sizeof(x[0])) +#define TEST_HZ_PER_KHZ 1000 +#define TEST_NSEC_MARGIN 500 /**< nanosecond margin when calculating clk freq */ + +#define MAX_QEMPTY_TIME_MSEC 50000 +#define MSEC_PER_SEC 1000 /**< Milli-seconds per second */ +#define USEC_PER_MSEC 1000 /**< Micro-seconds per milli-second */ +#define USEC_PER_SEC 1000000 /**< Micro-seconds per second */ + +/**< structures for testing rte_red performance and function */ +struct test_rte_red_config { /**< Test structure for RTE_RED config */ + struct rte_red_config *rconfig; /**< RTE_RED configuration parameters */ + uint8_t num_cfg; /**< Number of RTE_RED configs to test */ + uint8_t *wq_log2; /**< Test wq_log2 value to use */ + uint32_t min_th; /**< Queue minimum threshold */ + uint32_t max_th; /**< Queue maximum threshold */ + uint8_t *maxp_inv; /**< Inverse mark probability */ +}; + +struct test_queue { /**< Test structure for RTE_RED Queues */ + struct rte_red *rdata; /**< RTE_RED runtime data */ + uint32_t num_queues; /**< Number of RTE_RED queues to test */ + uint32_t *qconfig; /**< Configuration of RTE_RED queues for test */ + uint32_t *q; /**< Queue size */ + uint32_t q_ramp_up; /**< Num of enqueues to ramp up the queue */ + uint32_t avg_ramp_up; /**< Average num of enqueues to ramp up the queue */ + uint32_t avg_tolerance; /**< Tolerance in queue average */ + double drop_tolerance; /**< Drop tolerance of packets not enqueued */ +}; + +struct test_var { /**< Test variables used for testing RTE_RED */ + uint32_t wait_usec; /**< Micro second wait interval */ + uint32_t num_iterations; /**< Number of test iterations */ + uint32_t num_ops; /**< Number of test operations */ + uint64_t clk_freq; /**< CPU clock frequency */ + uint32_t sleep_sec; /**< Seconds to sleep */ + uint32_t *dropped; /**< Test operations dropped */ + uint32_t *enqueued; /**< Test operations enqueued */ +}; + +struct test_config { /**< Master test structure for RTE_RED */ + const char *ifname; /**< Interface name */ + const char *msg; /**< Test message for display */ + const char *htxt; /**< Header txt display for result output */ + struct test_rte_red_config *tconfig; /**< Test structure for RTE_RED config */ + struct test_queue *tqueue; /**< Test structure for RTE_RED Queues */ + struct test_var *tvar; /**< Test variables used for testing RTE_RED */ + uint32_t *tlevel; /**< Queue levels */ +}; + +enum test_result { + FAIL = 0, + PASS +}; + +/**< Test structure to define tests to run */ +struct tests { + struct test_config *testcfg; + enum test_result (*testfn)(struct test_config *); +}; + +struct rdtsc_prof { + uint64_t clk_start; + uint64_t clk_min; /**< min clocks */ + uint64_t clk_max; /**< max clocks */ + uint64_t clk_avgc; /**< count to calc average */ + double clk_avg; /**< cumulative sum to calc average */ + const char *name; +}; + +static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL; +static double inv_cycles_per_byte = 0; +static double pkt_time_usec = 0; + +static void init_port_ts(uint64_t cpu_clock) +{ + double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes); + inv_cycles_per_byte = 1.0 / cycles_per_byte; + pkt_time_usec = 1000000.0 / ((double)port_speed_bytes / (double)RTE_RED_S); +} + +static uint64_t get_port_ts(void) +{ + return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte); +} + +static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name) +{ + p->clk_min = (uint64_t)(-1LL); + p->clk_max = 0; + p->clk_avg = 0; + p->clk_avgc = 0; + p->name = name; +} + +static inline void rdtsc_prof_start(struct rdtsc_prof *p) +{ + asm( "cpuid" : : : "%eax", "%ebx", "%ecx", "%edx" ); + p->clk_start = rte_rdtsc(); +} + +static inline void rdtsc_prof_end(struct rdtsc_prof *p) +{ + uint64_t clk_start = rte_rdtsc() - p->clk_start; + + p->clk_avgc++; + p->clk_avg += (double) clk_start; + + if (clk_start > p->clk_max) + p->clk_max = clk_start; + if (clk_start < p->clk_min) + p->clk_min = clk_start; +} + +static void rdtsc_prof_print(struct rdtsc_prof *p) +{ + if (p->clk_avgc>0) { + printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64 ", max=%" PRIu64 ", avg=%.1f\n", + p->name, + p->clk_avgc, + p->clk_min, + p->clk_max, + (p->clk_avg / ((double) p->clk_avgc))); + } +} + +static uint32_t rte_red_get_avg_int(const struct rte_red_config *red_cfg, + struct rte_red *red) +{ + /** + * scale by 1/n and convert from fixed-point to integer + */ + return red->avg >> (RTE_RED_SCALING + red_cfg->wq_log2); +} + +static double rte_red_get_avg_float(const struct rte_red_config *red_cfg, + struct rte_red *red) +{ + /** + * scale by 1/n and convert from fixed-point to floating-point + */ + return ldexp((double)red->avg, -(RTE_RED_SCALING + red_cfg->wq_log2)); +} + +static void rte_red_set_avg_int(const struct rte_red_config *red_cfg, + struct rte_red *red, + uint32_t avg) +{ + /** + * scale by n and convert from integer to fixed-point + */ + red->avg = avg << (RTE_RED_SCALING + red_cfg->wq_log2); +} + +static double calc_exp_avg_on_empty(double avg, uint32_t n, uint32_t time_diff) +{ + return avg * pow((1.0 - 1.0 / (double)n), (double)time_diff / pkt_time_usec); +} + +static double calc_drop_rate(uint32_t enqueued, uint32_t dropped) +{ + return (double)dropped / ((double)enqueued + (double)dropped); +} + +/** + * calculate the drop probability + */ +static double calc_drop_prob(uint32_t min_th, uint32_t max_th, + uint32_t maxp_inv, uint32_t avg) +{ + double drop_prob = 0.0; + + if (avg < min_th) { + drop_prob = 0.0; + } else if (avg < max_th) { + drop_prob = (1.0 / (double)maxp_inv) + * ((double)(avg - min_th) + / (double)(max_th - min_th)); + } else { + drop_prob = 1.0; + } + return (drop_prob); +} + +/** + * check if drop rate matches drop probability within tolerance + */ +static int check_drop_rate(double *diff, double drop_rate, double drop_prob, double tolerance) +{ + double abs_diff = 0.0; + int ret = 1; + + abs_diff = fabs(drop_rate - drop_prob); + if ((int)abs_diff == 0) { + *diff = 0.0; + } else { + *diff = (abs_diff / drop_prob) * 100.0; + if (*diff > tolerance) { + ret = 0; + } + } + return (ret); +} + +/** + * check if average queue size is within tolerance + */ +static int check_avg(double *diff, double avg, double exp_avg, double tolerance) +{ + double abs_diff = 0.0; + int ret = 1; + + abs_diff = fabs(avg - exp_avg); + if ((int)abs_diff == 0) { + *diff = 0.0; + } else { + *diff = (abs_diff / exp_avg) * 100.0; + if (*diff > tolerance) { + ret = 0; + } + } + return (ret); +} + +/** + * get the clk frequency in Hz + */ +static uint64_t get_machclk_freq(void) +{ + uint64_t start = 0; + uint64_t end = 0; + uint64_t diff = 0; + uint64_t clk_freq_hz = 0; + struct timespec tv_start = {0, 0}, tv_end = {0, 0}; + struct timespec req = {0, 0}; + + req.tv_sec = 1; + req.tv_nsec = 0; + + clock_gettime(CLOCK_REALTIME, &tv_start); + start = rte_rdtsc(); + + if (nanosleep(&req, NULL) != 0) { + perror("get_machclk_freq()"); + exit(EXIT_FAILURE); + } + + clock_gettime(CLOCK_REALTIME, &tv_end); + end = rte_rdtsc(); + + diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * USEC_PER_SEC + + ((tv_end.tv_nsec - tv_start.tv_nsec + TEST_NSEC_MARGIN) / + USEC_PER_MSEC); /**< diff is in micro secs */ + + if (diff == 0) + return(0); + + clk_freq_hz = ((end - start) * USEC_PER_SEC / diff); + return (clk_freq_hz); +} + +/** + * initialize the test rte_red config + */ +static enum test_result +test_rte_red_init(struct test_config *tcfg) +{ + unsigned i = 0; + + tcfg->tvar->clk_freq = get_machclk_freq(); + init_port_ts( tcfg->tvar->clk_freq ); + + for (i = 0; i < tcfg->tconfig->num_cfg; i++) { + if (rte_red_config_init(&tcfg->tconfig->rconfig[i], + (uint16_t)tcfg->tconfig->wq_log2[i], + (uint16_t)tcfg->tconfig->min_th, + (uint16_t)tcfg->tconfig->max_th, + (uint16_t)tcfg->tconfig->maxp_inv[i]) != 0) { + return(FAIL); + } + } + + *tcfg->tqueue->q = 0; + *tcfg->tvar->dropped = 0; + *tcfg->tvar->enqueued = 0; + return(PASS); +} + +/** + * enqueue until actual queue size reaches target level + */ +static int +increase_actual_qsize(struct rte_red_config *red_cfg, + struct rte_red *red, + uint32_t *q, + uint32_t level, + uint32_t attempts) +{ + uint32_t i = 0; + + for (i = 0; i < attempts; i++) { + int ret = 0; + + /** + * enqueue + */ + ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts() ); + if (ret == 0) { + if (++(*q) >= level) + break; + } + } + /** + * check if target actual queue size has been reached + */ + if (*q != level) + return (-1); + /** + * success + */ + return (0); +} + +/** + * enqueue until average queue size reaches target level + */ +static int +increase_average_qsize(struct rte_red_config *red_cfg, + struct rte_red *red, + uint32_t *q, + uint32_t level, + uint32_t num_ops) +{ + uint32_t avg = 0; + uint32_t i = 0; + + for (i = 0; i < num_ops; i++) { + /** + * enqueue + */ + rte_red_enqueue(red_cfg, red, *q, get_port_ts()); + } + /** + * check if target average queue size has been reached + */ + avg = rte_red_get_avg_int(red_cfg, red); + if (avg != level) + return (-1); + /** + * success + */ + return (0); +} + +/** + * setup default values for the functional test structures + */ +static struct rte_red_config ft_wrconfig[1]; +static struct rte_red ft_rtdata[1]; +static uint8_t ft_wq_log2[] = {9}; +static uint8_t ft_maxp_inv[] = {10}; +static uint32_t ft_qconfig[] = {0, 0, 1, 1}; +static uint32_t ft_q[] ={0}; +static uint32_t ft_dropped[] ={0}; +static uint32_t ft_enqueued[] ={0}; + +static struct test_rte_red_config ft_tconfig = { + .rconfig = ft_wrconfig, + .num_cfg = DIM(ft_wrconfig), + .wq_log2 = ft_wq_log2, + .min_th = 32, + .max_th = 128, + .maxp_inv = ft_maxp_inv, +}; + +static struct test_queue ft_tqueue = { + .rdata = ft_rtdata, + .num_queues = DIM(ft_rtdata), + .qconfig = ft_qconfig, + .q = ft_q, + .q_ramp_up = 1000000, + .avg_ramp_up = 1000000, + .avg_tolerance = 5, /* 5 percent */ + .drop_tolerance = 50, /* 50 percent */ +}; + +static struct test_var ft_tvar = { + .wait_usec = 250000, + .num_iterations = 20, + .num_ops = 10000, + .clk_freq = 0, + .dropped = ft_dropped, + .enqueued = ft_enqueued, + .sleep_sec = (MAX_QEMPTY_TIME_MSEC / MSEC_PER_SEC) + 2, +}; + +/** + * functional test enqueue/dequeue packets + */ +static void enqueue_dequeue_func(struct rte_red_config *red_cfg, + struct rte_red *red, + uint32_t *q, + uint32_t num_ops, + uint32_t *enqueued, + uint32_t *dropped) +{ + uint32_t i = 0; + + for (i = 0; i < num_ops; i++) { + int ret = 0; + + /** + * enqueue + */ + ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts()); + if (ret == 0) + (*enqueued)++; + else + (*dropped)++; + } +} + +/** + * Test F1: functional test 1 + */ +static uint32_t ft1_tlevels[] = {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144}; + +static struct test_config func_test1_config = { + .ifname = "functional test 1 interface", + .msg = "functional test 1 : use one rte_red configuration,\n" + " increase average queue size to various levels,\n" + " compare drop rate to drop probability\n\n", + .htxt = " " + "avg queue size " + "enqueued " + "dropped " + "drop prob % " + "drop rate % " + "diff % " + "tolerance % " + "\n", + .tconfig = &ft_tconfig, + .tqueue = &ft_tqueue, + .tvar = &ft_tvar, + .tlevel = ft1_tlevels, +}; + +static enum test_result func_test1(struct test_config *tcfg) +{ + enum test_result result = PASS; + uint32_t i = 0; + + printf("%s", tcfg->msg); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + + printf("%s", tcfg->htxt); + + for (i = 0; i < DIM(ft1_tlevels); i++) { + const char *label = NULL; + uint32_t avg = 0; + double drop_rate = 0.0; + double drop_prob = 0.0; + double diff = 0.0; + + /** + * reset rte_red run-time data + */ + rte_red_rt_data_init(tcfg->tqueue->rdata); + *tcfg->tvar->enqueued = 0; + *tcfg->tvar->dropped = 0; + + if (increase_actual_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + tcfg->tlevel[i], + tcfg->tqueue->q_ramp_up) != 0) { + result = FAIL; + goto out; + } + + if (increase_average_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + tcfg->tlevel[i], + tcfg->tqueue->avg_ramp_up) != 0) { + result = FAIL; + goto out; + } + + enqueue_dequeue_func(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + tcfg->tvar->num_ops, + tcfg->tvar->enqueued, + tcfg->tvar->dropped); + + avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + if (avg != tcfg->tlevel[i]) { + fprintf(stderr, "Fail: avg != level\n"); + result = FAIL; + } + + drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped); + drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th, + *tcfg->tconfig->maxp_inv, tcfg->tlevel[i]); + if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance)) + result = FAIL; + + if (tcfg->tlevel[i] == tcfg->tconfig->min_th) + label = "min thresh: "; + else if (tcfg->tlevel[i] == tcfg->tconfig->max_th) + label = "max thresh: "; + else + label = " "; + printf("%s%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n", + label, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped, + drop_prob * 100.0, drop_rate * 100.0, diff, + (double)tcfg->tqueue->drop_tolerance); + } +out: + return (result); +} + +/** + * Test F2: functional test 2 + */ +static uint32_t ft2_tlevel[] = {127}; +static uint8_t ft2_wq_log2[] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; +static uint8_t ft2_maxp_inv[] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; +static struct rte_red_config ft2_rconfig[10]; + +static struct test_rte_red_config ft2_tconfig = { + .rconfig = ft2_rconfig, + .num_cfg = DIM(ft2_rconfig), + .wq_log2 = ft2_wq_log2, + .min_th = 32, + .max_th = 128, + .maxp_inv = ft2_maxp_inv, +}; + +static struct test_config func_test2_config = { + .ifname = "functional test 2 interface", + .msg = "functional test 2 : use several RED configurations,\n" + " increase average queue size to just below maximum threshold,\n" + " compare drop rate to drop probability\n\n", + .htxt = "RED config " + "avg queue size " + "min threshold " + "max threshold " + "drop prob % " + "drop rate % " + "diff % " + "tolerance % " + "\n", + .tconfig = &ft2_tconfig, + .tqueue = &ft_tqueue, + .tvar = &ft_tvar, + .tlevel = ft2_tlevel, +}; + +static enum test_result func_test2(struct test_config *tcfg) +{ + enum test_result result = PASS; + double prev_drop_rate = 1.0; + uint32_t i = 0; + + printf("%s", tcfg->msg); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + rte_red_rt_data_init(tcfg->tqueue->rdata); + + if (increase_actual_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + *tcfg->tlevel, + tcfg->tqueue->q_ramp_up) != 0) { + result = FAIL; + goto out; + } + + if (increase_average_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + *tcfg->tlevel, + tcfg->tqueue->avg_ramp_up) != 0) { + result = FAIL; + goto out; + } + printf("%s", tcfg->htxt); + + for (i = 0; i < tcfg->tconfig->num_cfg; i++) { + uint32_t avg = 0; + double drop_rate = 0.0; + double drop_prob = 0.0; + double diff = 0.0; + + *tcfg->tvar->dropped = 0; + *tcfg->tvar->enqueued = 0; + + enqueue_dequeue_func(&tcfg->tconfig->rconfig[i], + tcfg->tqueue->rdata, + tcfg->tqueue->q, + tcfg->tvar->num_ops, + tcfg->tvar->enqueued, + tcfg->tvar->dropped); + + avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[i], tcfg->tqueue->rdata); + if (avg != *tcfg->tlevel) + result = FAIL; + + drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped); + drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th, + tcfg->tconfig->maxp_inv[i], *tcfg->tlevel); + if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance)) + result = FAIL; + /** + * drop rate should decrease as maxp_inv increases + */ + if (drop_rate > prev_drop_rate) + result = FAIL; + prev_drop_rate = drop_rate; + + printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n", + i, avg, tcfg->tconfig->min_th, tcfg->tconfig->max_th, + drop_prob * 100.0, drop_rate * 100.0, diff, + (double)tcfg->tqueue->drop_tolerance); + } +out: + return (result); +} + +/** + * Test F3: functional test 3 + */ +static uint32_t ft3_tlevel[] = {1022}; + +static struct test_rte_red_config ft3_tconfig = { + .rconfig = ft_wrconfig, + .num_cfg = DIM(ft_wrconfig), + .wq_log2 = ft_wq_log2, + .min_th = 32, + .max_th = 1023, + .maxp_inv = ft_maxp_inv, +}; + +static struct test_config func_test3_config = { + .ifname = "functional test 3 interface", + .msg = "functional test 3 : use one RED configuration,\n" + " increase average queue size to target level,\n" + " dequeue all packets until queue is empty,\n" + " confirm that average queue size is computed correctly while queue is empty\n\n", + .htxt = "q avg before " + "q avg after " + "expected " + "difference % " + "tolerance % " + "result " + "\n", + .tconfig = &ft3_tconfig, + .tqueue = &ft_tqueue, + .tvar = &ft_tvar, + .tlevel = ft3_tlevel, +}; + +static enum test_result func_test3(struct test_config *tcfg) +{ + enum test_result result = PASS; + uint32_t i = 0; + + printf("%s", tcfg->msg); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + + rte_red_rt_data_init(tcfg->tqueue->rdata); + + if (increase_actual_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + *tcfg->tlevel, + tcfg->tqueue->q_ramp_up) != 0) { + result = FAIL; + goto out; + } + + if (increase_average_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + *tcfg->tlevel, + tcfg->tqueue->avg_ramp_up) != 0) { + result = FAIL; + goto out; + } + + printf("%s", tcfg->htxt); + + for (i = 0; i < tcfg->tvar->num_iterations; i++) { + double avg_before = 0; + double avg_after = 0; + double exp_avg = 0; + double diff = 0.0; + + avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + + /** + * empty the queue + */ + *tcfg->tqueue->q = 0; + rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts()); + + rte_delay_us(tcfg->tvar->wait_usec); + + /** + * enqueue one packet to recalculate average queue size + */ + if (rte_red_enqueue(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + *tcfg->tqueue->q, + get_port_ts()) == 0) { + (*tcfg->tqueue->q)++; + } else { + printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__); + result = FAIL; + } + + exp_avg = calc_exp_avg_on_empty(avg_before, + (1 << *tcfg->tconfig->wq_log2), + tcfg->tvar->wait_usec); + avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata); + if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance)) + result = FAIL; + + printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n", + avg_before, avg_after, exp_avg, diff, + (double)tcfg->tqueue->avg_tolerance, + diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail"); + } +out: + return (result); +} + +/** + * Test F4: functional test 4 + */ +static uint32_t ft4_tlevel[] = {1022}; +static uint8_t ft4_wq_log2[] = {11}; + +static struct test_rte_red_config ft4_tconfig = { + .rconfig = ft_wrconfig, + .num_cfg = DIM(ft_wrconfig), + .min_th = 32, + .max_th = 1023, + .wq_log2 = ft4_wq_log2, + .maxp_inv = ft_maxp_inv, +}; + +static struct test_queue ft4_tqueue = { + .rdata = ft_rtdata, + .num_queues = DIM(ft_rtdata), + .qconfig = ft_qconfig, + .q = ft_q, + .q_ramp_up = 1000000, + .avg_ramp_up = 1000000, + .avg_tolerance = 0, /* 0 percent */ + .drop_tolerance = 50, /* 50 percent */ +}; + +static struct test_config func_test4_config = { + .ifname = "functional test 4 interface", + .msg = "functional test 4 : use one RED configuration,\n" + " increase average queue size to target level,\n" + " dequeue all packets until queue is empty,\n" + " confirm that average queue size is computed correctly while\n" + " queue is empty for more than 50 sec,\n" + " (this test takes 52 sec to run)\n\n", + .htxt = "q avg before " + "q avg after " + "expected " + "difference % " + "tolerance % " + "result " + "\n", + .tconfig = &ft4_tconfig, + .tqueue = &ft4_tqueue, + .tvar = &ft_tvar, + .tlevel = ft4_tlevel, +}; + +static enum test_result func_test4(struct test_config *tcfg) +{ + enum test_result result = PASS; + uint64_t time_diff = 0; + uint64_t start = 0; + double avg_before = 0.0; + double avg_after = 0.0; + double exp_avg = 0.0; + double diff = 0.0; + + printf("%s", tcfg->msg); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + + rte_red_rt_data_init(tcfg->tqueue->rdata); + + if (increase_actual_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + *tcfg->tlevel, + tcfg->tqueue->q_ramp_up) != 0) { + result = FAIL; + goto out; + } + + if (increase_average_qsize(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + *tcfg->tlevel, + tcfg->tqueue->avg_ramp_up) != 0) { + result = FAIL; + goto out; + } + + printf("%s", tcfg->htxt); + + avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + + /** + * empty the queue + */ + *tcfg->tqueue->q = 0; + rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts()); + + /** + * record empty time locally + */ + start = rte_rdtsc(); + + sleep(tcfg->tvar->sleep_sec); + + /** + * enqueue one packet to recalculate average queue size + */ + if (rte_red_enqueue(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + *tcfg->tqueue->q, + get_port_ts()) != 0) { + result = FAIL; + goto out; + } + (*tcfg->tqueue->q)++; + + /** + * calculate how long queue has been empty + */ + time_diff = ((rte_rdtsc() - start) / tcfg->tvar->clk_freq) + * MSEC_PER_SEC; + if (time_diff < MAX_QEMPTY_TIME_MSEC) { + /** + * this could happen if sleep was interrupted for some reason + */ + result = FAIL; + goto out; + } + + /** + * confirm that average queue size is now at expected level + */ + exp_avg = 0.0; + avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance)) + result = FAIL; + + printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n", + avg_before, avg_after, exp_avg, + diff, (double)tcfg->tqueue->avg_tolerance, + diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail"); +out: + return (result); +} + +/** + * Test F5: functional test 5 + */ +static uint32_t ft5_tlevel[] = {127}; +static uint8_t ft5_wq_log2[] = {9, 8}; +static uint8_t ft5_maxp_inv[] = {10, 20}; +static struct rte_red_config ft5_config[2]; +static struct rte_red ft5_data[4]; +static uint32_t ft5_q[4]; +static uint32_t ft5_dropped[] = {0, 0, 0, 0}; +static uint32_t ft5_enqueued[] = {0, 0, 0, 0}; + +static struct test_rte_red_config ft5_tconfig = { + .rconfig = ft5_config, + .num_cfg = DIM(ft5_config), + .min_th = 32, + .max_th = 128, + .wq_log2 = ft5_wq_log2, + .maxp_inv = ft5_maxp_inv, +}; + +static struct test_queue ft5_tqueue = { + .rdata = ft5_data, + .num_queues = DIM(ft5_data), + .qconfig = ft_qconfig, + .q = ft5_q, + .q_ramp_up = 1000000, + .avg_ramp_up = 1000000, + .avg_tolerance = 5, /* 10 percent */ + .drop_tolerance = 50, /* 50 percent */ +}; + +struct test_var ft5_tvar = { + .wait_usec = 0, + .num_iterations = 15, + .num_ops = 10000, + .clk_freq = 0, + .dropped = ft5_dropped, + .enqueued = ft5_enqueued, + .sleep_sec = 0, +}; + +static struct test_config func_test5_config = { + .ifname = "functional test 5 interface", + .msg = "functional test 5 : use several queues (each with its own run-time data),\n" + " use several RED configurations (such that each configuration is shared by multiple queues),\n" + " increase average queue size to just below maximum threshold,\n" + " compare drop rate to drop probability,\n" + " (this is a larger scale version of functional test 2)\n\n", + .htxt = "queue " + "config " + "avg queue size " + "min threshold " + "max threshold " + "drop prob % " + "drop rate % " + "diff % " + "tolerance % " + "\n", + .tconfig = &ft5_tconfig, + .tqueue = &ft5_tqueue, + .tvar = &ft5_tvar, + .tlevel = ft5_tlevel, +}; + +static enum test_result func_test5(struct test_config *tcfg) +{ + enum test_result result = PASS; + uint32_t j = 0; + + printf("%s", tcfg->msg); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + + printf("%s", tcfg->htxt); + + for (j = 0; j < tcfg->tqueue->num_queues; j++) { + rte_red_rt_data_init(&tcfg->tqueue->rdata[j]); + tcfg->tqueue->q[j] = 0; + + if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j], + &tcfg->tqueue->q[j], + *tcfg->tlevel, + tcfg->tqueue->q_ramp_up) != 0) { + result = FAIL; + goto out; + } + + if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j], + &tcfg->tqueue->q[j], + *tcfg->tlevel, + tcfg->tqueue->avg_ramp_up) != 0) { + result = FAIL; + goto out; + } + } + + for (j = 0; j < tcfg->tqueue->num_queues; j++) { + uint32_t avg = 0; + double drop_rate = 0.0; + double drop_prob = 0.0; + double diff = 0.0; + + tcfg->tvar->dropped[j] = 0; + tcfg->tvar->enqueued[j] = 0; + + enqueue_dequeue_func(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j], + &tcfg->tqueue->q[j], + tcfg->tvar->num_ops, + &tcfg->tvar->enqueued[j], + &tcfg->tvar->dropped[j]); + + avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j]); + if (avg != *tcfg->tlevel) + result = FAIL; + + drop_rate = calc_drop_rate(tcfg->tvar->enqueued[j],tcfg->tvar->dropped[j]); + drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th, + tcfg->tconfig->maxp_inv[tcfg->tqueue->qconfig[j]], + *tcfg->tlevel); + if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance)) + result = FAIL; + + printf("%-15u%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n", + j, tcfg->tqueue->qconfig[j], avg, + tcfg->tconfig->min_th, tcfg->tconfig->max_th, + drop_prob * 100.0, drop_rate * 100.0, + diff, (double)tcfg->tqueue->drop_tolerance); + } +out: + return (result); +} + +/** + * Test F6: functional test 6 + */ +static uint32_t ft6_tlevel[] = {1022}; +static uint8_t ft6_wq_log2[] = {9, 8}; +static uint8_t ft6_maxp_inv[] = {10, 20}; +static struct rte_red_config ft6_config[2]; +static struct rte_red ft6_data[4]; +static uint32_t ft6_q[4]; + +static struct test_rte_red_config ft6_tconfig = { + .rconfig = ft6_config, + .num_cfg = DIM(ft6_config), + .min_th = 32, + .max_th = 1023, + .wq_log2 = ft6_wq_log2, + .maxp_inv = ft6_maxp_inv, +}; + +static struct test_queue ft6_tqueue = { + .rdata = ft6_data, + .num_queues = DIM(ft6_data), + .qconfig = ft_qconfig, + .q = ft6_q, + .q_ramp_up = 1000000, + .avg_ramp_up = 1000000, + .avg_tolerance = 5, /* 10 percent */ + .drop_tolerance = 50, /* 50 percent */ +}; + +static struct test_config func_test6_config = { + .ifname = "functional test 6 interface", + .msg = "functional test 6 : use several queues (each with its own run-time data),\n" + " use several RED configurations (such that each configuration is sharte_red by multiple queues),\n" + " increase average queue size to target level,\n" + " dequeue all packets until queue is empty,\n" + " confirm that average queue size is computed correctly while queue is empty\n" + " (this is a larger scale version of functional test 3)\n\n", + .htxt = "queue " + "config " + "q avg before " + "q avg after " + "expected " + "difference % " + "tolerance % " + "result ""\n", + .tconfig = &ft6_tconfig, + .tqueue = &ft6_tqueue, + .tvar = &ft_tvar, + .tlevel = ft6_tlevel, +}; + +static enum test_result func_test6(struct test_config *tcfg) +{ + enum test_result result = PASS; + uint32_t j = 0; + + printf("%s", tcfg->msg); + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + printf("%s", tcfg->htxt); + + for (j = 0; j < tcfg->tqueue->num_queues; j++) { + rte_red_rt_data_init(&tcfg->tqueue->rdata[j]); + tcfg->tqueue->q[j] = 0; + + if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j], + &tcfg->tqueue->q[j], + *tcfg->tlevel, + tcfg->tqueue->q_ramp_up) != 0) { + result = FAIL; + goto out; + } + if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j], + &tcfg->tqueue->q[j], + *tcfg->tlevel, + tcfg->tqueue->avg_ramp_up) != 0) { + result = FAIL; + goto out; + } + } + for (j = 0; j < tcfg->tqueue->num_queues; j++) { + double avg_before = 0; + double avg_after = 0; + double exp_avg = 0; + double diff = 0.0; + + avg_before = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j]); + + /** + * empty the queue + */ + tcfg->tqueue->q[j] = 0; + rte_red_mark_queue_empty(&tcfg->tqueue->rdata[j], get_port_ts()); + rte_delay_us(tcfg->tvar->wait_usec); + + /** + * enqueue one packet to recalculate average queue size + */ + if (rte_red_enqueue(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j], + tcfg->tqueue->q[j], + get_port_ts()) == 0) { + tcfg->tqueue->q[j]++; + } else { + printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__); + result = FAIL; + } + + exp_avg = calc_exp_avg_on_empty(avg_before, + (1 << tcfg->tconfig->wq_log2[tcfg->tqueue->qconfig[j]]), + tcfg->tvar->wait_usec); + avg_after = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]], + &tcfg->tqueue->rdata[j]); + if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance)) + result = FAIL; + + printf("%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n", + j, tcfg->tqueue->qconfig[j], avg_before, avg_after, + exp_avg, diff, (double)tcfg->tqueue->avg_tolerance, + diff <= tcfg->tqueue->avg_tolerance ? "pass" : "fail"); + } +out: + return (result); +} + +/** + * setup default values for the performance test structures + */ +static struct rte_red_config pt_wrconfig[1]; +static struct rte_red pt_rtdata[1]; +static uint8_t pt_wq_log2[] = {9}; +static uint8_t pt_maxp_inv[] = {10}; +static uint32_t pt_qconfig[] = {0}; +static uint32_t pt_q[] = {0}; +static uint32_t pt_dropped[] = {0}; +static uint32_t pt_enqueued[] = {0}; + +static struct test_rte_red_config pt_tconfig = { + .rconfig = pt_wrconfig, + .num_cfg = DIM(pt_wrconfig), + .wq_log2 = pt_wq_log2, + .min_th = 32, + .max_th = 128, + .maxp_inv = pt_maxp_inv, +}; + +static struct test_queue pt_tqueue = { + .rdata = pt_rtdata, + .num_queues = DIM(pt_rtdata), + .qconfig = pt_qconfig, + .q = pt_q, + .q_ramp_up = 1000000, + .avg_ramp_up = 1000000, + .avg_tolerance = 5, /* 10 percent */ + .drop_tolerance = 50, /* 50 percent */ +}; + +/** + * enqueue/dequeue packets + */ +static void enqueue_dequeue_perf(struct rte_red_config *red_cfg, + struct rte_red *red, + uint32_t *q, + uint32_t num_ops, + uint32_t *enqueued, + uint32_t *dropped, + struct rdtsc_prof *prof) +{ + uint32_t i = 0; + + for (i = 0; i < num_ops; i++) { + uint64_t ts = 0; + int ret = 0; + /** + * enqueue + */ + ts = get_port_ts(); + rdtsc_prof_start(prof); + ret = rte_red_enqueue(red_cfg, red, *q, ts ); + rdtsc_prof_end(prof); + if (ret == 0) + (*enqueued)++; + else + (*dropped)++; + } +} + +/** + * Setup test structures for tests P1, P2, P3 + * performance tests 1, 2 and 3 + */ +static uint32_t pt1_tlevel[] = {16}; +static uint32_t pt2_tlevel[] = {80}; +static uint32_t pt3_tlevel[] = {144}; + +static struct test_var perf1_tvar = { + .wait_usec = 0, + .num_iterations = 15, + .num_ops = 50000000, + .clk_freq = 0, + .dropped = pt_dropped, + .enqueued = pt_enqueued, + .sleep_sec = 0 +}; + +static struct test_config perf1_test1_config = { + .ifname = "performance test 1 interface", + .msg = "performance test 1 : use one RED configuration,\n" + " set actual and average queue sizes to level below min threshold,\n" + " measure enqueue performance\n\n", + .tconfig = &pt_tconfig, + .tqueue = &pt_tqueue, + .tvar = &perf1_tvar, + .tlevel = pt1_tlevel, +}; + +static struct test_config perf1_test2_config = { + .ifname = "performance test 2 interface", + .msg = "performance test 2 : use one RED configuration,\n" + " set actual and average queue sizes to level in between min and max thresholds,\n" + " measure enqueue performance\n\n", + .tconfig = &pt_tconfig, + .tqueue = &pt_tqueue, + .tvar = &perf1_tvar, + .tlevel = pt2_tlevel, +}; + +static struct test_config perf1_test3_config = { + .ifname = "performance test 3 interface", + .msg = "performance test 3 : use one RED configuration,\n" + " set actual and average queue sizes to level above max threshold,\n" + " measure enqueue performance\n\n", + .tconfig = &pt_tconfig, + .tqueue = &pt_tqueue, + .tvar = &perf1_tvar, + .tlevel = pt3_tlevel, +}; + +/** + * Performance test function to measure enqueue performance. + * This runs performance tests 1, 2 and 3 + */ +static enum test_result perf1_test(struct test_config *tcfg) +{ + enum test_result result = PASS; + struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL}; + uint32_t total = 0; + + printf("%s", tcfg->msg); + + rdtsc_prof_init(&prof, "enqueue"); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + + /** + * set average queue size to target level + */ + *tcfg->tqueue->q = *tcfg->tlevel; + + /** + * initialize the rte_red run time data structure + */ + rte_red_rt_data_init(tcfg->tqueue->rdata); + + /** + * set the queue average + */ + rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel); + if (rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata) + != *tcfg->tlevel) { + result = FAIL; + goto out; + } + + enqueue_dequeue_perf(tcfg->tconfig->rconfig, + tcfg->tqueue->rdata, + tcfg->tqueue->q, + tcfg->tvar->num_ops, + tcfg->tvar->enqueued, + tcfg->tvar->dropped, + &prof); + + total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped; + + printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total, + *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0, + *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0); + + rdtsc_prof_print(&prof); +out: + return (result); +} + +/** + * Setup test structures for tests P4, P5, P6 + * performance tests 4, 5 and 6 + */ +static uint32_t pt4_tlevel[] = {16}; +static uint32_t pt5_tlevel[] = {80}; +static uint32_t pt6_tlevel[] = {144}; + +static struct test_var perf2_tvar = { + .wait_usec = 500, + .num_iterations = 10000, + .num_ops = 10000, + .dropped = pt_dropped, + .enqueued = pt_enqueued, + .sleep_sec = 0 +}; + +static struct test_config perf2_test4_config = { + .ifname = "performance test 4 interface", + .msg = "performance test 4 : use one RED configuration,\n" + " set actual and average queue sizes to level below min threshold,\n" + " dequeue all packets until queue is empty,\n" + " measure enqueue performance when queue is empty\n\n", + .htxt = "iteration " + "q avg before " + "q avg after " + "expected " + "difference % " + "tolerance % " + "result ""\n", + .tconfig = &pt_tconfig, + .tqueue = &pt_tqueue, + .tvar = &perf2_tvar, + .tlevel = pt4_tlevel, +}; + +static struct test_config perf2_test5_config = { + .ifname = "performance test 5 interface", + .msg = "performance test 5 : use one RED configuration,\n" + " set actual and average queue sizes to level in between min and max thresholds,\n" + " dequeue all packets until queue is empty,\n" + " measure enqueue performance when queue is empty\n\n", + .htxt = "iteration " + "q avg before " + "q avg after " + "expected " + "difference " + "tolerance " + "result ""\n", + .tconfig = &pt_tconfig, + .tqueue = &pt_tqueue, + .tvar = &perf2_tvar, + .tlevel = pt5_tlevel, +}; + +static struct test_config perf2_test6_config = { + .ifname = "performance test 6 interface", + .msg = "performance test 6 : use one RED configuration,\n" + " set actual and average queue sizes to level above max threshold,\n" + " dequeue all packets until queue is empty,\n" + " measure enqueue performance when queue is empty\n\n", + .htxt = "iteration " + "q avg before " + "q avg after " + "expected " + "difference % " + "tolerance % " + "result ""\n", + .tconfig = &pt_tconfig, + .tqueue = &pt_tqueue, + .tvar = &perf2_tvar, + .tlevel = pt6_tlevel, +}; + +/** + * Performance test function to measure enqueue performance when the + * queue is empty. This runs performance tests 4, 5 and 6 + */ +static enum test_result perf2_test(struct test_config *tcfg) +{ + enum test_result result = PASS; + struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL}; + uint32_t total = 0; + uint32_t i = 0; + + printf("%s", tcfg->msg); + + rdtsc_prof_init(&prof, "enqueue"); + + if (test_rte_red_init(tcfg) != PASS) { + result = FAIL; + goto out; + } + + printf("%s", tcfg->htxt); + + for (i = 0; i < tcfg->tvar->num_iterations; i++) { + uint32_t count = 0; + uint64_t ts = 0; + double avg_before = 0; + int ret = 0; + + /** + * set average queue size to target level + */ + *tcfg->tqueue->q = *tcfg->tlevel; + count = (*tcfg->tqueue->rdata).count; + + /** + * initialize the rte_red run time data structure + */ + rte_red_rt_data_init(tcfg->tqueue->rdata); + (*tcfg->tqueue->rdata).count = count; + + /** + * set the queue average + */ + rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel); + avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + if ((avg_before < *tcfg->tlevel) || (avg_before > *tcfg->tlevel)) { + result = FAIL; + goto out; + } + + /** + * empty the queue + */ + *tcfg->tqueue->q = 0; + rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts()); + + /** + * wait for specified period of time + */ + rte_delay_us(tcfg->tvar->wait_usec); + + /** + * measure performance of enqueue operation while queue is empty + */ + ts = get_port_ts(); + rdtsc_prof_start(&prof); + ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, + *tcfg->tqueue->q, ts ); + rdtsc_prof_end(&prof); + + /** + * gather enqueued/dropped statistics + */ + if (ret == 0) + (*tcfg->tvar->enqueued)++; + else + (*tcfg->tvar->dropped)++; + + /** + * on first and last iteration, confirm that + * average queue size was computed correctly + */ + if ((i == 0) || (i == tcfg->tvar->num_iterations - 1)) { + double avg_after = 0; + double exp_avg = 0; + double diff = 0.0; + int ok = 0; + + avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + exp_avg = calc_exp_avg_on_empty(avg_before, + (1 << *tcfg->tconfig->wq_log2), + tcfg->tvar->wait_usec); + if (check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance)) + ok = 1; + printf("%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n", + i, avg_before, avg_after, exp_avg, diff, + (double)tcfg->tqueue->avg_tolerance, ok ? "pass" : "fail"); + if (!ok) { + result = FAIL; + goto out; + } + } + } + total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped; + printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total, + *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0, + *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0); + + rdtsc_prof_print(&prof); +out: + return (result); +} + +/** + * setup default values for overflow test structures + */ +static uint32_t avg_max = 0; +static uint32_t avg_max_bits = 0; + +static struct rte_red_config ovfl_wrconfig[1]; +static struct rte_red ovfl_rtdata[1]; +static uint8_t ovfl_maxp_inv[] = {10}; +static uint32_t ovfl_qconfig[] = {0, 0, 1, 1}; +static uint32_t ovfl_q[] ={0}; +static uint32_t ovfl_dropped[] ={0}; +static uint32_t ovfl_enqueued[] ={0}; +static uint32_t ovfl_tlevel[] = {1023}; +static uint8_t ovfl_wq_log2[] = {12}; + +static struct test_rte_red_config ovfl_tconfig = { + .rconfig = ovfl_wrconfig, + .num_cfg = DIM(ovfl_wrconfig), + .wq_log2 = ovfl_wq_log2, + .min_th = 32, + .max_th = 1023, + .maxp_inv = ovfl_maxp_inv, +}; + +static struct test_queue ovfl_tqueue = { + .rdata = ovfl_rtdata, + .num_queues = DIM(ovfl_rtdata), + .qconfig = ovfl_qconfig, + .q = ovfl_q, + .q_ramp_up = 1000000, + .avg_ramp_up = 1000000, + .avg_tolerance = 5, /* 10 percent */ + .drop_tolerance = 50, /* 50 percent */ +}; + +static struct test_var ovfl_tvar = { + .wait_usec = 10000, + .num_iterations = 1, + .num_ops = 10000, + .clk_freq = 0, + .dropped = ovfl_dropped, + .enqueued = ovfl_enqueued, + .sleep_sec = 0 +}; + +static void ovfl_check_avg(uint32_t avg) +{ + if (avg > avg_max) { + double avg_log = 0; + uint32_t bits = 0; + avg_max = avg; + avg_log = log(((double)avg_max)); + avg_log = avg_log / log(2.0); + bits = (uint32_t)ceil(avg_log); + if (bits > avg_max_bits) + avg_max_bits = bits; + } +} + +static struct test_config ovfl_test1_config = { + .ifname = "queue avergage overflow test interface", + .msg = "overflow test 1 : use one RED configuration,\n" + " increase average queue size to target level,\n" + " check maximum number of bits requirte_red to represent avg_s\n\n", + .htxt = "avg queue size " + "wq_log2 " + "fraction bits " + "max queue avg " + "num bits " + "enqueued " + "dropped " + "drop prob % " + "drop rate % " + "\n", + .tconfig = &ovfl_tconfig, + .tqueue = &ovfl_tqueue, + .tvar = &ovfl_tvar, + .tlevel = ovfl_tlevel, +}; + +static enum test_result ovfl_test1(struct test_config *tcfg) +{ + enum test_result result = PASS; + uint32_t avg = 0; + uint32_t i = 0; + double drop_rate = 0.0; + double drop_prob = 0.0; + double diff = 0.0; + int ret = 0; + + printf("%s", tcfg->msg); + + if (test_rte_red_init(tcfg) != PASS) { + + result = FAIL; + goto out; + } + + /** + * reset rte_red run-time data + */ + rte_red_rt_data_init(tcfg->tqueue->rdata); + + /** + * increase actual queue size + */ + for (i = 0; i < tcfg->tqueue->q_ramp_up; i++) { + ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, + *tcfg->tqueue->q, get_port_ts()); + + if (ret == 0) { + if (++(*tcfg->tqueue->q) >= *tcfg->tlevel) + break; + } + } + + /** + * enqueue + */ + for (i = 0; i < tcfg->tqueue->avg_ramp_up; i++) { + ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, + *tcfg->tqueue->q, get_port_ts()); + ovfl_check_avg((*tcfg->tqueue->rdata).avg); + avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + if (avg == *tcfg->tlevel) { + if (ret == 0) + (*tcfg->tvar->enqueued)++; + else + (*tcfg->tvar->dropped)++; + } + } + + /** + * check if target average queue size has been reached + */ + avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata); + if (avg != *tcfg->tlevel) { + result = FAIL; + goto out; + } + + /** + * check drop rate against drop probability + */ + drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped); + drop_prob = calc_drop_prob(tcfg->tconfig->min_th, + tcfg->tconfig->max_th, + *tcfg->tconfig->maxp_inv, + *tcfg->tlevel); + if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance)) + result = FAIL; + + printf("%s", tcfg->htxt); + + printf("%-16u%-9u%-15u0x%08x %-10u%-10u%-10u%-13.2lf%-13.2lf\n", + avg, *tcfg->tconfig->wq_log2, RTE_RED_SCALING, + avg_max, avg_max_bits, + *tcfg->tvar->enqueued, *tcfg->tvar->dropped, + drop_prob * 100.0, drop_rate * 100.0); +out: + return (result); +} + +/** + * define the functional and performance tests to be executed + */ +struct tests func_tests[] = { + { &func_test1_config, func_test1 }, + { &func_test2_config, func_test2 }, + { &func_test3_config, func_test3 }, + { &func_test4_config, func_test4 }, + { &func_test5_config, func_test5 }, + { &func_test6_config, func_test6 }, + { &ovfl_test1_config, ovfl_test1 }, +}; + +struct tests perf_tests[] = { + { &perf1_test1_config, perf1_test }, + { &perf1_test2_config, perf1_test }, + { &perf1_test3_config, perf1_test }, + { &perf2_test4_config, perf2_test }, + { &perf2_test5_config, perf2_test }, + { &perf2_test6_config, perf2_test }, +}; + +/** + * function to execute the required_red tests + */ +static void run_tests(struct tests *test_type, uint32_t test_count, uint32_t *num_tests, uint32_t *num_pass) +{ + enum test_result result = PASS; + uint32_t i = 0; + + for (i = 0; i < test_count; i++) { + printf("\n--------------------------------------------------------------------------------\n"); + result = test_type[i].testfn(test_type[i].testcfg); + (*num_tests)++; + if (result == PASS) { + (*num_pass)++; + printf("--------------------------------------------------------------------------\n"); + } else { + printf("--------------------------------------------------------------------------\n"); + } + } + return; +} + +/** + * check if functions accept invalid parameters + * + * First, all functions will be called without initialized RED + * Then, all of them will be called with NULL/invalid parameters + * + * Some functions are not tested as they are performance-critical and thus + * don't do any parameter checking. + */ +static int +test_invalid_parameters(void) +{ + struct rte_red_config config; + + if (rte_red_rt_data_init(NULL) == 0) { + printf("rte_red_rt_data_init should have failed!\n"); + return -1; + } + + if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) { + printf("rte_red_config_init should have failed!\n"); + return -1; + } + + if (rte_red_rt_data_init(NULL) == 0) { + printf("rte_red_rt_data_init should have failed!\n"); + return -1; + } + + /* NULL config */ + if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + /* min_treshold == max_treshold */ + if (rte_red_config_init(&config, 0, 1, 1, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + /* min_treshold > max_treshold */ + if (rte_red_config_init(&config, 0, 2, 1, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + /* wq_log2 > RTE_RED_WQ_LOG2_MAX */ + if (rte_red_config_init(&config, + RTE_RED_WQ_LOG2_MAX + 1, 1, 2, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + /* wq_log2 < RTE_RED_WQ_LOG2_MIN */ + if (rte_red_config_init(&config, + RTE_RED_WQ_LOG2_MIN - 1, 1, 2, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + /* maxp_inv > RTE_RED_MAXP_INV_MAX */ + if (rte_red_config_init(&config, + RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MAX + 1) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + /* maxp_inv < RTE_RED_MAXP_INV_MIN */ + if (rte_red_config_init(&config, + RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MIN - 1) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } + + return 0; +} + +int test_red(void) +{ + uint32_t num_tests = 0; + uint32_t num_pass = 0; + int ret = 0; + + if (test_invalid_parameters() < 0) + return -1; + + run_tests(func_tests, DIM(func_tests), &num_tests, &num_pass); + run_tests(perf_tests, DIM(perf_tests), &num_tests, &num_pass); + + if (num_pass == num_tests) { + printf("[total: %u, pass: %u]\n", num_tests, num_pass); + ret = 0; + } else { + printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass, num_tests - num_pass); + ret = -1; + } + return (ret); +} + +#else + +int +test_red(void) +{ + printf("The SCHED library is not included in this build\n"); + return 0; +} + +#endif diff --git a/app/test/test_sched.c b/app/test/test_sched.c new file mode 100755 index 0000000000..a0efa52491 --- /dev/null +++ b/app/test/test_sched.c @@ -0,0 +1,244 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include + +#include + +#include "test.h" + +#if defined(RTE_LIBRTE_SCHED) && defined(RTE_ARCH_X86_64) + +#include +#include +#include +#include +#include + + +#define VERIFY(exp,fmt,args...) \ + if (!(exp)) { \ + printf(fmt, ##args); \ + return -1; \ + } + + +#define SUBPORT 0 +#define PIPE 1 +#define TC 2 +#define QUEUE 3 + +static struct rte_sched_subport_params subport_param[] = { + { + .tb_rate = 1250000000, + .tb_size = 1000000, + + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, + }, +}; + +static struct rte_sched_pipe_params pipe_profile[] = { + { /* Profile #0 */ + .tb_rate = 305175, + .tb_size = 1000000, + + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, + + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + }, +}; + +static struct rte_sched_port_params port_param = { + .name = "port_0", + .socket = 0, /* computed */ + .rate = 0, /* computed */ + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = pipe_profile, + .n_pipe_profiles = 1, +}; + +#define NB_MBUF 32 +#define MAX_PACKET_SZ 2048 +#define MBUF_SZ (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define PKT_BURST_SZ 32 +#define MEMPOOL_CACHE_SZ PKT_BURST_SZ +#define SOCKET 0 + + +static struct rte_mempool * +create_mempool(void) +{ + struct rte_mempool * mp; + + mp = rte_mempool_lookup("test_sched"); + if (!mp) + mp = rte_mempool_create("test_sched", + NB_MBUF, + MBUF_SZ, + MEMPOOL_CACHE_SZ, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, + NULL, + rte_pktmbuf_init, + NULL, + SOCKET, + 0); + + return mp; +} + +static void +prepare_pkt(struct rte_mbuf *mbuf) +{ + struct ether_hdr *eth_hdr; + struct vlan_hdr *vlan1, *vlan2; + struct ipv4_hdr *ip_hdr; + + /* Simulate a classifier */ + eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + vlan1 = (struct vlan_hdr *)(ð_hdr->ether_type ); + vlan2 = (struct vlan_hdr *)((uintptr_t)ð_hdr->ether_type + sizeof(struct vlan_hdr)); + eth_hdr = (struct ether_hdr *)((uintptr_t)ð_hdr->ether_type + 2 *sizeof(struct vlan_hdr)); + ip_hdr = (struct ipv4_hdr *)((uintptr_t)eth_hdr + sizeof(eth_hdr->ether_type)); + + vlan1->vlan_tci = rte_cpu_to_be_16(SUBPORT); + vlan2->vlan_tci = rte_cpu_to_be_16(PIPE); + eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE); + + + rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE, e_RTE_METER_YELLOW); + + /* 64 byte packet */ + mbuf->pkt.pkt_len = 60; + mbuf->pkt.data_len = 60; +} + + +/** + * test main entrance for library sched + */ +int +test_sched(void) +{ + struct rte_mempool *mp = NULL; + struct rte_sched_port *port = NULL; + uint32_t pipe; + struct rte_mbuf *in_mbufs[10]; + struct rte_mbuf *out_mbufs[10]; + int i; + + int err; + + mp = create_mempool(); + + port_param.socket = 0; + port_param.rate = (uint64_t) 10000 * 1000 * 1000 / 8; + port_param.name = "port_0"; + + port = rte_sched_port_config(&port_param); + VERIFY(port != NULL, "Error config sched port\n"); + + + err = rte_sched_subport_config(port, SUBPORT, subport_param); + VERIFY(err == 0, "Error config sched, err=%d\n", err); + + for (pipe = 0; pipe < port_param.n_pipes_per_subport; pipe ++) { + err = rte_sched_pipe_config(port, SUBPORT, pipe, 0); + VERIFY(err == 0, "Error config sched pipe %u, err=%d\n", pipe, err); + } + + for (i = 0; i < 10; i++) { + in_mbufs[i] = rte_pktmbuf_alloc(mp); + prepare_pkt(in_mbufs[i]); + } + + + err = rte_sched_port_enqueue(port, in_mbufs, 10); + VERIFY(err == 10, "Wrong enqueue, err=%d\n", err); + + err = rte_sched_port_dequeue(port, out_mbufs, 10); + VERIFY(err == 10, "Wrong dequeue, err=%d\n", err); + + for (i = 0; i < 10; i++) { + enum rte_meter_color color; + uint32_t subport, traffic_class, queue; + + color = rte_sched_port_pkt_read_color(out_mbufs[i]); + VERIFY(color == e_RTE_METER_YELLOW, "Wrong color\n"); + + rte_sched_port_pkt_read_tree_path(out_mbufs[i], + &subport, &pipe, &traffic_class, &queue); + + VERIFY(subport == SUBPORT, "Wrong subport\n"); + VERIFY(pipe == PIPE, "Wrong pipe\n"); + VERIFY(traffic_class == TC, "Wrong traffic_class\n"); + VERIFY(queue == QUEUE, "Wrong queue\n"); + + } + + + struct rte_sched_subport_stats subport_stats; + uint32_t tc_ov; + rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov); + //VERIFY(subport_stats.n_pkts_tc[TC-1] == 10, "Wrong subport stats\n"); + + struct rte_sched_queue_stats queue_stats; + uint16_t qlen; + rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen); + //VERIFY(queue_stats.n_pkts == 10, "Wrong queue stats\n"); + + rte_sched_port_free(port); + + return 0; +} + +#else /* RTE_LIBRTE_SCHED */ + +int +test_sched(void) +{ + printf("The Scheduler library is not included in this build\n"); + return 0; +} +#endif /* RTE_LIBRTE_SCHED */ diff --git a/config/defconfig_i686-default-linuxapp-gcc b/config/defconfig_i686-default-linuxapp-gcc index 5960e85ff5..a63d37a1d0 100644 --- a/config/defconfig_i686-default-linuxapp-gcc +++ b/config/defconfig_i686-default-linuxapp-gcc @@ -233,6 +233,16 @@ CONFIG_RTE_LIBRTE_NET=y # CONFIG_RTE_LIBRTE_METER=y +# +# Compile librte_sched +# +CONFIG_RTE_LIBRTE_SCHED=y +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_BITMAP_ARRAY1_SIZE=16 + # # Compile librte_kni # diff --git a/config/defconfig_i686-default-linuxapp-icc b/config/defconfig_i686-default-linuxapp-icc index 6c6a59d9de..cf86ba5b08 100644 --- a/config/defconfig_i686-default-linuxapp-icc +++ b/config/defconfig_i686-default-linuxapp-icc @@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y # CONFIG_RTE_LIBRTE_METER=y +# +# Compile librte_sched +# +CONFIG_RTE_LIBRTE_SCHED=y +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_BITMAP_ARRAY1_SIZE=16 + # # Compile librte_kni # diff --git a/config/defconfig_x86_64-default-linuxapp-gcc b/config/defconfig_x86_64-default-linuxapp-gcc index 1dcc8c6486..b5d3362afc 100644 --- a/config/defconfig_x86_64-default-linuxapp-gcc +++ b/config/defconfig_x86_64-default-linuxapp-gcc @@ -235,6 +235,17 @@ CONFIG_RTE_LIBRTE_NET=y CONFIG_RTE_LIBRTE_METER=y # +# Compile librte_sched +# +CONFIG_RTE_LIBRTE_SCHED=y +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_BITMAP_ARRAY1_SIZE=16 + +# +# Compile the test application # Compile librte_kni # CONFIG_RTE_LIBRTE_KNI=y diff --git a/config/defconfig_x86_64-default-linuxapp-icc b/config/defconfig_x86_64-default-linuxapp-icc index 7053a1067e..60f10afd16 100644 --- a/config/defconfig_x86_64-default-linuxapp-icc +++ b/config/defconfig_x86_64-default-linuxapp-icc @@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y # CONFIG_RTE_LIBRTE_METER=y +# +# Compile librte_sched +# +CONFIG_RTE_LIBRTE_SCHED=y +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_BITMAP_ARRAY1_SIZE=16 + # # Compile librte_kni # diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile new file mode 100755 index 0000000000..08f4d191c0 --- /dev/null +++ b/examples/qos_sched/Makefile @@ -0,0 +1,58 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2013 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-default-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +# binary name +APP = qos_sched + +# all source are stored in SRCS-y +SRCS-y := main.c args.c init.c app_thread.c cfg_file.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -lrte_sched + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c new file mode 100755 index 0000000000..afce5efe24 --- /dev/null +++ b/examples/qos_sched/app_thread.c @@ -0,0 +1,302 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" + +/* + * QoS parameters are encoded as follows: + * Outer VLAN ID defines subport + * Inner VLAN ID defines pipe + * Destination IP 0.0.XXX.0 defines traffic class + * Destination IP host (0.0.0.XXX) defines queue + * Values below define offset to each field from start of frame + */ +#define SUBPORT_OFFSET 7 +#define PIPE_OFFSET 9 +#define TC_OFFSET 20 +#define QUEUE_OFFSET 20 +#define COLOR_OFFSET 19 + +static inline int +get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe, + uint32_t *traffic_class, uint32_t *queue, uint32_t *color) +{ + uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *); + + *subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) & + (port_params.n_subports_per_port - 1); /* Outer VLAN ID*/ + *pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) & + (port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */ + *traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) & + (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */ + *queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) & + (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */ + *color = pdata[COLOR_OFFSET] & 0x03; /* Destination IP */ + + return 0; +} + +void +app_rx_thread(struct thread_conf **confs) +{ + uint32_t i, nb_rx; + struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned; + struct thread_conf *conf; + int conf_idx = 0; + + uint32_t subport; + uint32_t pipe; + uint32_t traffic_class; + uint32_t queue; + uint32_t color; + + while ((conf = confs[conf_idx])) { + nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs, + burst_conf.rx_burst); + + if (likely(nb_rx != 0)) { + APP_STATS_ADD(conf->stat.nb_rx, nb_rx); + + for(i = 0; i < nb_rx; i++) { + get_pkt_sched(rx_mbufs[i], + &subport, &pipe, &traffic_class, &queue, &color); + rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe, + traffic_class, queue, (enum rte_meter_color) color); + } + + if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring, + (void **)rx_mbufs, nb_rx) != 0)) { + for(i = 0; i < nb_rx; i++) { + rte_pktmbuf_free(rx_mbufs[i]); + + APP_STATS_ADD(conf->stat.nb_drop, 1); + } + } + } + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + + +/* Send the packet to an output interface + * For performance reason function returns number of packets dropped, not sent, + * so 0 means that all packets were sent successfully + */ + +static inline void +app_send_burst(struct thread_conf *qconf) +{ + struct rte_mbuf **mbufs; + uint32_t n, ret; + + mbufs = (struct rte_mbuf **)qconf->m_table; + n = qconf->n_mbufs; + + do { + ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n); + if (unlikely(ret < n)) { /* we cannot drop the packets, so re-send */ + /* update number of packets to be sent */ + n -= ret; + mbufs = (struct rte_mbuf **)&mbufs[ret]; + /* limit number of retries to avoid endless loop */ + /* reset retry counter if some packets were sent */ + if (likely(ret != 0)) { + continue; + } + } + } while (ret != n); +} + + +/* Send the packet to an output interface */ +static void +app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt) +{ + uint32_t i, len; + + len = qconf->n_mbufs; + for(i = 0; i < nb_pkt; i++) { + qconf->m_table[len] = mbufs[i]; + len++; + /* enough pkts to be sent */ + if (unlikely(len == burst_conf.tx_burst)) { + qconf->n_mbufs = len; + app_send_burst(qconf); + len = 0; + } + } + + qconf->n_mbufs = len; +} + +void +app_tx_thread(struct thread_conf **confs) +{ + struct rte_mbuf *mbufs[burst_conf.qos_dequeue]; + struct thread_conf *conf; + int conf_idx = 0; + int retval; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + while ((conf = confs[conf_idx])) { + retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs, + burst_conf.qos_dequeue); + if (likely(retval == 0)) { + app_send_packets(conf, mbufs, burst_conf.qos_dequeue); + + conf->counter = 0; /* reset empty read loop counter */ + } + + conf->counter++; + + /* drain ring and TX queues */ + if (unlikely(conf->counter > drain_tsc)) { + /* now check is there any packets left to be transmitted */ + if (conf->n_mbufs != 0) { + app_send_burst(conf); + + conf->n_mbufs = 0; + } + conf->counter = 0; + } + + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + +void +app_worker_thread(struct thread_conf **confs) +{ + struct rte_mbuf *mbufs[burst_conf.ring_burst]; + struct thread_conf *conf; + int conf_idx = 0; + + while ((conf = confs[conf_idx])) { + uint32_t nb_pkt; + int retval; + + /* Read packet from the ring */ + retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs, + burst_conf.ring_burst); + if (likely(retval == 0)) { + int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs, + burst_conf.ring_burst); + + APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent); + APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst); + } + + nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs, + burst_conf.qos_dequeue); + if (likely(nb_pkt > 0)) + while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0); + + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + +void +app_mixed_thread(struct thread_conf **confs) +{ + struct rte_mbuf *mbufs[burst_conf.ring_burst]; + struct thread_conf *conf; + int conf_idx = 0; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; + + while ((conf = confs[conf_idx])) { + uint32_t nb_pkt; + int retval; + + /* Read packet from the ring */ + retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs, + burst_conf.ring_burst); + if (likely(retval == 0)) { + int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs, + burst_conf.ring_burst); + + APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent); + APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst); + } + + + nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs, + burst_conf.qos_dequeue); + if (likely(nb_pkt > 0)) { + app_send_packets(conf, mbufs, nb_pkt); + + conf->counter = 0; /* reset empty read loop counter */ + } + + conf->counter++; + + /* drain ring and TX queues */ + if (unlikely(conf->counter > drain_tsc)) { + + /* now check is there any packets left to be transmitted */ + if (conf->n_mbufs != 0) { + app_send_burst(conf); + + conf->n_mbufs = 0; + } + conf->counter = 0; + } + + conf_idx++; + if (confs[conf_idx] == NULL) + conf_idx = 0; + } +} + + diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c new file mode 100755 index 0000000000..c9cfdb24b3 --- /dev/null +++ b/examples/qos_sched/args.c @@ -0,0 +1,467 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "main.h" + +#define APP_NAME "qos_sched" +#define MAX_OPT_VALUES 8 +#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/" + +static uint32_t app_master_core = 1; +static uint32_t app_numa_mask; +static uint64_t app_used_core_mask = 0; +static uint64_t app_used_port_mask = 0; +static uint64_t app_used_rx_port_mask = 0; +static uint64_t app_used_tx_port_mask = 0; + + +static const char usage[] = + " \n" + " %s \n" + " \n" + "Application mandatory parameters: \n" + " --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n" + " multiple pfc can be configured in command line \n" + " \n" + "Application optional parameters: \n" + " --mst I : master core index (default value is %u) \n" + " --rsz \"A, B, C\" : Ring sizes \n" + " A = Size (in number of buffer descriptors) of each of the NIC RX \n" + " rings read by the I/O RX lcores (default value is %u) \n" + " B = Size (in number of elements) of each of the SW rings used by the\n" + " I/O RX lcores to send packets to worker lcores (default value is\n" + " %u) \n" + " C = Size (in number of buffer descriptors) of each of the NIC TX \n" + " rings written by worker lcores (default value is %u) \n" + " --bsz \"A, B, C, D\": Burst sizes \n" + " A = I/O RX lcore read burst size from NIC RX (default value is %u) \n" + " B = I/O RX lcore write burst size to output SW rings, \n" + " Worker lcore read burst size from input SW rings, \n" + " QoS enqueue size (default value is %u) \n" + " C = QoS dequeue size (default value is %u) \n" + " D = Worker lcore write burst size to NIC TX (default value is %u) \n" + " --rth \"A, B, C\" : RX queue threshold parameters \n" + " A = RX prefetch threshold (default value is %u) \n" + " B = RX host threshold (default value is %u) \n" + " C = RX write-back threshold (default value is %u) \n" + " --tth \"A, B, C\" : TX queue threshold parameters \n" + " A = TX prefetch threshold (default value is %u) \n" + " B = TX host threshold (default value is %u) \n" + " C = TX write-back threshold (default value is %u) \n" + " --cfg FILE : profile configuration to load \n" +; + +/* display usage */ +static void +app_usage(const char *prgname) +{ + printf(usage, prgname, app_master_core, + APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT, + MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE, MAX_PKT_TX_BURST, + RX_PTHRESH, RX_HTHRESH, RX_WTHRESH, + TX_PTHRESH, TX_HTHRESH, TX_WTHRESH + ); +} + +static inline int str_is(const char *str, const char *is) +{ + return (strcmp(str, is) == 0); +} + +/* returns core mask used by DPDK */ +static uint64_t +app_eal_core_mask(void) +{ + uint32_t i; + uint64_t cm = 0; + struct rte_config *cfg = rte_eal_get_configuration(); + + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (cfg->lcore_role[i] == ROLE_RTE) + cm |= (1ULL << i); + } + + cm |= (1ULL << cfg->master_lcore); + + return cm; +} + + +/* returns total number of cores presented in a system */ +static uint32_t +app_cpu_core_count(void) +{ + int i, len; + char path[PATH_MAX]; + uint32_t ncores = 0; + + for(i = 0; i < RTE_MAX_LCORE; i++) { + len = rte_snprintf(path, sizeof(path), SYS_CPU_DIR, i); + if (len <= 0 || (unsigned)len >= sizeof(path)) + continue; + + if (access(path, F_OK) == 0) + ncores++; + } + + return ncores; +} + +/* returns: + number of values parsed + -1 in case of error +*/ +static int +app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals) +{ + char *string; + uint32_t i, n_tokens; + char *tokens[MAX_OPT_VALUES]; + + if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES) + return -1; + + /* duplicate configuration string before splitting it to tokens */ + string = strdup(conf_str); + if (string == NULL) + return -1; + + n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator); + + for(i = 0; i < n_tokens; i++) { + opt_vals[i] = (uint32_t)atol(tokens[i]); + } + + free(string); + + return n_tokens; +} + +static int +app_parse_ring_conf(const char *conf_str) +{ + int ret; + uint32_t vals[3]; + + ret = app_parse_opt_vals(conf_str, ',', 3, vals); + if (ret != 3) + return ret; + + ring_conf.rx_size = vals[0]; + ring_conf.ring_size = vals[1]; + ring_conf.tx_size = vals[2]; + + return 0; +} + +static int +app_parse_rth_conf(const char *conf_str) +{ + int ret; + uint32_t vals[3]; + + ret = app_parse_opt_vals(conf_str, ',', 3, vals); + if (ret != 3) + return ret; + + rx_thresh.pthresh = (uint8_t)vals[0]; + rx_thresh.hthresh = (uint8_t)vals[1]; + rx_thresh.wthresh = (uint8_t)vals[2]; + + return 0; +} + +static int +app_parse_tth_conf(const char *conf_str) +{ + int ret; + uint32_t vals[3]; + + ret = app_parse_opt_vals(conf_str, ',', 3, vals); + if (ret != 3) + return ret; + + tx_thresh.pthresh = (uint8_t)vals[0]; + tx_thresh.hthresh = (uint8_t)vals[1]; + tx_thresh.wthresh = (uint8_t)vals[2]; + + return 0; +} + +static int +app_parse_flow_conf(const char *conf_str) +{ + int ret; + uint32_t vals[5]; + struct flow_conf *pconf; + uint64_t mask; + + ret = app_parse_opt_vals(conf_str, ',', 6, vals); + if (ret < 4 || ret > 5) + return ret; + + pconf = &qos_conf[nb_pfc]; + + pconf->rx_port = (uint8_t)vals[0]; + pconf->tx_port = (uint8_t)vals[1]; + pconf->rx_core = (uint8_t)vals[2]; + pconf->wt_core = (uint8_t)vals[3]; + if (ret == 5) + pconf->tx_core = (uint8_t)vals[4]; + else + pconf->tx_core = pconf->wt_core; + + if (pconf->rx_core == pconf->wt_core) { + RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc); + return -1; + } + + if (pconf->rx_port >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, APP, "pfc %u: invalid rx port %hu index\n", nb_pfc, pconf->rx_port); + return -1; + } + if (pconf->tx_port >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, APP, "pfc %u: invalid tx port %hu index\n", nb_pfc, pconf->rx_port); + return -1; + } + + mask = 1lu << pconf->rx_port; + if (app_used_rx_port_mask & mask) { + RTE_LOG(ERR, APP, "pfc %u: rx port %hu is used already\n", nb_pfc, pconf->rx_port); + return -1; + } + app_used_rx_port_mask |= mask; + app_used_port_mask |= mask; + + mask = 1lu << pconf->tx_port; + if (app_used_tx_port_mask & mask) { + RTE_LOG(ERR, APP, "pfc %u: port %hu is used already\n", nb_pfc, pconf->tx_port); + return -1; + } + app_used_tx_port_mask |= mask; + app_used_port_mask |= mask; + + mask = 1lu << pconf->rx_core; + app_used_core_mask |= mask; + + mask = 1lu << pconf->wt_core; + app_used_core_mask |= mask; + + mask = 1lu << pconf->tx_core; + app_used_core_mask |= mask; + + nb_pfc++; + + return 0; +} + +static int +app_parse_burst_conf(const char *conf_str) +{ + int ret; + uint32_t vals[4]; + + ret = app_parse_opt_vals(conf_str, ',', 4, vals); + if (ret != 4) + return ret; + + burst_conf.rx_burst = (uint16_t)vals[0]; + burst_conf.ring_burst = (uint16_t)vals[1]; + burst_conf.qos_dequeue = (uint16_t)vals[2]; + burst_conf.tx_burst = (uint16_t)vals[3]; + + return 0; +} + +/* + * Parses the argument given in the command line of the application, + * calculates mask for used cores and initializes EAL with calculated core mask + */ +int +app_parse_args(int argc, char **argv) +{ + int opt, ret; + int option_index; + const char *optname; + char *prgname = argv[0]; + uint32_t i, nb_lcores; + + static struct option lgopts[] = { + { "pfc", 1, 0, 0 }, + { "mst", 1, 0, 0 }, + { "rsz", 1, 0, 0 }, + { "bsz", 1, 0, 0 }, + { "rth", 1, 0, 0 }, + { "tth", 1, 0, 0 }, + { "cfg", 1, 0, 0 }, + { NULL, 0, 0, 0 } + }; + + /* initialize EAL first */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + return -1; + + argc -= ret; + argv += ret; + + /* set en_US locale to print big numbers with ',' */ + setlocale(LC_NUMERIC, "en_US.utf-8"); + + while ((opt = getopt_long(argc, argv, "", + lgopts, &option_index)) != EOF) { + + switch (opt) { + /* long options */ + case 0: + optname = lgopts[option_index].name; + if (str_is(optname, "pfc")) { + ret = app_parse_flow_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "mst")) { + app_master_core = (uint32_t)atoi(optarg); + break; + } + if (str_is(optname, "rsz")) { + ret = app_parse_ring_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "bsz")) { + ret = app_parse_burst_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "rth")) { + ret = app_parse_rth_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "tth")) { + ret = app_parse_tth_conf(optarg); + if (ret) { + RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg); + return -1; + } + break; + } + if (str_is(optname, "cfg")) { + cfg_profile = optarg; + break; + } + break; + + default: + app_usage(prgname); + return -1; + } + } + + /* check master core index validity */ + for(i = 0; i <= app_master_core; i++) { + if (app_used_core_mask & (1u << app_master_core)) { + RTE_LOG(ERR, APP, "Master core index is not configured properly\n"); + app_usage(prgname); + return -1; + } + } + app_used_core_mask |= 1u << app_master_core; + + if ((app_used_core_mask != app_eal_core_mask()) || + (app_master_core != rte_get_master_lcore())) { + RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64 + " instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask()); + return -1; + } + + if (nb_pfc == 0) { + RTE_LOG(ERR, APP, "Packet flow not configured!\n"); + app_usage(prgname); + return -1; + } + + /* sanity check for cores assignment */ + nb_lcores = app_cpu_core_count(); + + for(i = 0; i < nb_pfc; i++) { + if (qos_conf[i].rx_core >= nb_lcores) { + RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1, + qos_conf[i].rx_core); + return -1; + } + if (qos_conf[i].wt_core >= nb_lcores) { + RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1, + qos_conf[i].wt_core); + return -1; + } + uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core); + uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core); + if (rx_sock != wt_sock) { + RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1); + return -1; + } + app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core); + } + + return 0; +} + diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c new file mode 100755 index 0000000000..85f9c02635 --- /dev/null +++ b/examples/qos_sched/cfg_file.c @@ -0,0 +1,631 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "cfg_file.h" +#include "main.h" + + +/** when we resize a file structure, how many extra entries + * for new sections do we add in */ +#define CFG_ALLOC_SECTION_BATCH 8 +/** when we resize a section structure, how many extra entries + * for new entries do we add in */ +#define CFG_ALLOC_ENTRY_BATCH 16 + +static unsigned +_strip(char *str, unsigned len) +{ + int newlen = len; + if (len == 0) + return 0; + + if (isspace(str[len-1])) { + /* strip trailing whitespace */ + while (newlen > 0 && isspace(str[newlen - 1])) + str[--newlen] = '\0'; + } + + if (isspace(str[0])) { + /* strip leading whitespace */ + int i,start = 1; + while (isspace(str[start]) && start < newlen) + start++ + ; /* do nothing */ + newlen -= start; + for (i = 0; i < newlen; i++) + str[i] = str[i+start]; + str[i] = '\0'; + } + return newlen; +} + +struct cfg_file * +cfg_load(const char *filename, int flags) +{ + int allocated_sections = CFG_ALLOC_SECTION_BATCH; + int allocated_entries = 0; + int curr_section = -1; + int curr_entry = -1; + char buffer[256]; + int lineno = 0; + struct cfg_file *cfg = NULL; + + FILE *f = fopen(filename, "r"); + if (f == NULL) + return NULL; + + cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) * allocated_sections); + if (cfg == NULL) + goto error2; + + memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections); + + while (fgets(buffer, sizeof(buffer), f) != NULL) { + char *pos = NULL; + size_t len = strnlen(buffer, sizeof(buffer)); + lineno++; + if (len >=sizeof(buffer) - 1 && buffer[len-1] != '\n'){ + printf("Error line %d - no \\n found on string. " + "Check if line too long\n", lineno); + goto error1; + } + if ((pos = memchr(buffer, ';', sizeof(buffer))) != NULL) { + *pos = '\0'; + len = pos - buffer; + } + + len = _strip(buffer, len); + if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL) + continue; + + if (buffer[0] == '[') { + /* section heading line */ + char *end = memchr(buffer, ']', len); + if (end == NULL) { + printf("Error line %d - no terminating '[' found\n", lineno); + goto error1; + } + *end = '\0'; + _strip(&buffer[1], end - &buffer[1]); + + /* close off old section and add start new one */ + if (curr_section >= 0) + cfg->sections[curr_section]->num_entries = curr_entry + 1; + curr_section++; + + /* resize overall struct if we don't have room for more sections */ + if (curr_section == allocated_sections) { + allocated_sections += CFG_ALLOC_SECTION_BATCH; + struct cfg_file *n_cfg = realloc(cfg, sizeof(*cfg) + + sizeof(cfg->sections[0]) * allocated_sections); + if (n_cfg == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + cfg = n_cfg; + } + + /* allocate space for new section */ + allocated_entries = CFG_ALLOC_ENTRY_BATCH; + curr_entry = -1; + cfg->sections[curr_section] = malloc(sizeof(*cfg->sections[0]) + + sizeof(cfg->sections[0]->entries[0]) * allocated_entries); + if (cfg->sections[curr_section] == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + + rte_snprintf(cfg->sections[curr_section]->name, + sizeof(cfg->sections[0]->name), + "%s", &buffer[1]); + } + else { + /* value line */ + if (curr_section < 0) { + printf("Error line %d - value outside of section\n", lineno); + goto error1; + } + + struct cfg_section *sect = cfg->sections[curr_section]; + char *split[2]; + if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') != 2) { + printf("Error at line %d - cannot split string\n", lineno); + goto error1; + } + + curr_entry++; + if (curr_entry == allocated_entries) { + allocated_entries += CFG_ALLOC_ENTRY_BATCH; + struct cfg_section *n_sect = realloc(sect, sizeof(*sect) + + sizeof(sect->entries[0]) * allocated_entries); + if (n_sect == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + sect = cfg->sections[curr_section] = n_sect; + } + + sect->entries[curr_entry] = malloc(sizeof(*sect->entries[0])); + if (sect->entries[curr_entry] == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + + struct cfg_entry *entry = sect->entries[curr_entry]; + rte_snprintf(entry->name, sizeof(entry->name), "%s", split[0]); + rte_snprintf(entry->value, sizeof(entry->value), "%s", split[1]); + _strip(entry->name, strnlen(entry->name, sizeof(entry->name))); + _strip(entry->value, strnlen(entry->value, sizeof(entry->value))); + } + } + fclose(f); + cfg->flags = flags; + cfg->sections[curr_section]->num_entries = curr_entry + 1; + cfg->num_sections = curr_section + 1; + return cfg; + +error1: + cfg_close(cfg); +error2: + fclose(f); + return NULL; +} + + +int cfg_close(struct cfg_file *cfg) +{ + int i, j; + + if (cfg == NULL) + return -1; + + for(i = 0; i < cfg->num_sections; i++) { + if (cfg->sections[i] != NULL) { + if (cfg->sections[i]->num_entries) { + for(j = 0; j < cfg->sections[i]->num_entries; j++) { + if (cfg->sections[i]->entries[j] != NULL) + free(cfg->sections[i]->entries[j]); + } + } + free(cfg->sections[i]); + } + } + free(cfg); + + return 0; +} + +int +cfg_num_sections(struct cfg_file *cfg, const char *sectionname, size_t length) +{ + int i; + int num_sections = 0; + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i]->name, sectionname, length) == 0) + num_sections++; + } + return num_sections; +} + +int +cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections) +{ + int i; + for (i = 0; i < cfg->num_sections && i < max_sections; i++) { + rte_snprintf(sections[i], CFG_NAME_LEN, "%s", cfg->sections[i]->name); + } + return i; +} + +static const struct cfg_section * +_get_section(struct cfg_file *cfg, const char *sectionname) +{ + int i; + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i]->name, sectionname, + sizeof(cfg->sections[0]->name)) == 0) + return cfg->sections[i]; + } + return NULL; +} + +int +cfg_has_section(struct cfg_file *cfg, const char *sectionname) +{ + return (_get_section(cfg, sectionname) != NULL); +} + +int +cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname) +{ + const struct cfg_section *s = _get_section(cfg, sectionname); + if (s == NULL) + return -1; + return s->num_entries; +} + + +int +cfg_section_entries(struct cfg_file *cfg, const char *sectionname, + struct cfg_entry *entries, int max_entries) +{ + int i; + const struct cfg_section *sect = _get_section(cfg, sectionname); + if (sect == NULL) + return -1; + for (i = 0; i < max_entries && i < sect->num_entries; i++) + entries[i] = *sect->entries[i]; + return i; +} + +const char * +cfg_get_entry(struct cfg_file *cfg, const char *sectionname, + const char *entryname) +{ + int i; + const struct cfg_section *sect = _get_section(cfg, sectionname); + if (sect == NULL) + return NULL; + for (i = 0; i < sect->num_entries; i++) + if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) == 0) + return sect->entries[i]->value; + return NULL; +} + +int +cfg_has_entry(struct cfg_file *cfg, const char *sectionname, + const char *entryname) +{ + return (cfg_get_entry(cfg, sectionname, entryname) != NULL); +} + + +int +cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port_params) +{ + const char *entry; + int j; + + if (!cfg || !port_params) + return -1; + + entry = cfg_get_entry(cfg, "port", "frame overhead"); + if (entry) + port_params->frame_overhead = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, "port", "number of subports per port"); + if (entry) + port_params->n_subports_per_port = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, "port", "number of pipes per subport"); + if (entry) + port_params->n_pipes_per_subport = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, "port", "queue sizes"); + if (entry) { + char *next; + + for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + +#ifdef RTE_SCHED_RED + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + char str[32]; + + /* Parse WRED min thresholds */ + rte_snprintf(str, sizeof(str), "tc %d wred min", j); + entry = cfg_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].min_th + = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED max thresholds */ + rte_snprintf(str, sizeof(str), "tc %d wred max", j); + entry = cfg_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].max_th + = (uint16_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED inverse mark probabilities */ + rte_snprintf(str, sizeof(str), "tc %d wred inv prob", j); + entry = cfg_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].maxp_inv + = (uint8_t)strtol(entry, &next, 10); + + if (next == NULL) + break; + entry = next; + } + } + + /* Parse WRED EWMA filter weights */ + rte_snprintf(str, sizeof(str), "tc %d wred weight", j); + entry = cfg_get_entry(cfg, "red", str); + if (entry) { + char *next; + int k; + /* for each packet colour (green, yellow, red) */ + for (k = 0; k < e_RTE_METER_COLORS; k++) { + port_params->red_params[j][k].wq_log2 + = (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + } +#endif /* RTE_SCHED_RED */ + + return 0; +} + +int +cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe_params) +{ + int i, j; + char *next; + const char *entry; + int profiles; + + if (!cfg || !pipe_params) + return -1; + + profiles = cfg_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1); + port_params.n_pipe_profiles = profiles; + + for (j = 0; j < profiles; j++) { + char pipe_name[32]; + rte_snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j); + + entry = cfg_get_entry(cfg, pipe_name, "tb rate"); + if (entry) + pipe_params[j].tb_rate = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tb size"); + if (entry) + pipe_params[j].tb_size = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc period"); + if (entry) + pipe_params[j].tc_period = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 0 rate"); + if (entry) + pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 1 rate"); + if (entry) + pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 2 rate"); + if (entry) + pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 3 rate"); + if (entry) + pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry); + +#ifdef RTE_SCHED_SUBPORT_TC_OV + entry = cfg_get_entry(cfg, pipe_name, "tc 0 oversubscription weight"); + if (entry) + pipe_params[j].tc_ov_weight[0] = (uint8_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 1 oversubscription weight"); + if (entry) + pipe_params[j].tc_ov_weight[1] = (uint8_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 2 oversubscription weight"); + if (entry) + pipe_params[j].tc_ov_weight[2] = (uint8_t)atoi(entry); + + entry = cfg_get_entry(cfg, pipe_name, "tc 3 oversubscription weight"); + if (entry) + pipe_params[j].tc_ov_weight[3] = (uint8_t)atoi(entry); +#endif + + entry = cfg_get_entry(cfg, pipe_name, "tc 0 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + entry = cfg_get_entry(cfg, pipe_name, "tc 1 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + entry = cfg_get_entry(cfg, pipe_name, "tc 2 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + entry = cfg_get_entry(cfg, pipe_name, "tc 3 wrr weights"); + if (entry) { + for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) { + pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] = + (uint8_t)strtol(entry, &next, 10); + if (next == NULL) + break; + entry = next; + } + } + } + return 0; +} + +int +cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport_params) +{ + const char *entry; + int i, j, k; + + if (!cfg || !subport_params) + return -1; + + memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile)); + + for (i = 0; i < MAX_SCHED_SUBPORTS; i++) { + char sec_name[CFG_NAME_LEN]; + rte_snprintf(sec_name, sizeof(sec_name), "subport %d", i); + + if (cfg_has_section(cfg, sec_name)) { + entry = cfg_get_entry(cfg, sec_name, "tb rate"); + if (entry) + subport_params[i].tb_rate = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, sec_name, "tb size"); + if (entry) + subport_params[i].tb_size = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, sec_name, "tc period"); + if (entry) + subport_params[i].tc_period = (uint32_t)atoi(entry); + +#ifdef RTE_SCHED_SUBPORT_TC_OV + entry = cfg_get_entry(cfg, sec_name, "tc oversubscription period"); + if (entry) + subport_params[i].tc_ov_period = (uint32_t)atoi(entry); +#endif + + entry = cfg_get_entry(cfg, sec_name, "tc 0 rate"); + if (entry) + subport_params[i].tc_rate[0] = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, sec_name, "tc 1 rate"); + if (entry) + subport_params[i].tc_rate[1] = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, sec_name, "tc 2 rate"); + if (entry) + subport_params[i].tc_rate[2] = (uint32_t)atoi(entry); + + entry = cfg_get_entry(cfg, sec_name, "tc 3 rate"); + if (entry) + subport_params[i].tc_rate[3] = (uint32_t)atoi(entry); + + int n_entries = cfg_section_num_entries(cfg, sec_name); + struct cfg_entry entries[n_entries]; + + cfg_section_entries(cfg, sec_name, entries, n_entries); + + for (j = 0; j < n_entries; j++) { + if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) { + int profile; + char *tokens[2] = {NULL, NULL}; + int n_tokens; + int begin, end; + + profile = atoi(entries[j].value); + n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")], + strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-'); + + begin = atoi(tokens[0]); + if (n_tokens == 2) + end = atoi(tokens[1]); + else + end = begin; + + if (end >= MAX_SCHED_PIPES || begin > end) + return -1; + + for (k = begin; k <= end; k++) { + char profile_name[CFG_NAME_LEN]; + + rte_snprintf(profile_name, sizeof(profile_name), + "pipe profile %d", profile); + if (cfg_has_section(cfg, profile_name)) + app_pipe_to_profile[i][k] = profile; + else + rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n", + entries[j].value); + + } + } + } + } + } + + return 0; +} + + diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h new file mode 100755 index 0000000000..2e265e689b --- /dev/null +++ b/examples/qos_sched/cfg_file.h @@ -0,0 +1,103 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __CFG_FILE_H__ +#define __CFG_FILE_H__ + +#include + +#define CFG_NAME_LEN 32 +#define CFG_VALUE_LEN 64 + +struct cfg_entry { + char name[CFG_NAME_LEN]; + char value[CFG_VALUE_LEN]; +}; + +struct cfg_section { + char name[CFG_NAME_LEN]; + int num_entries; + struct cfg_entry *entries[0]; +}; + +struct cfg_file { + int flags; + int num_sections; + struct cfg_section *sections[0]; +}; + + +int cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port); + +int cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe); + +int cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport); + + +/* reads a config file from disk and returns a handle to the config + * 'flags' is reserved for future use and must be 0 + */ +struct cfg_file *cfg_load(const char *filename, int flags); + +/* returns the number of sections in the config */ +int cfg_num_sections(struct cfg_file *cfg, const char *sec_name, size_t length); + +/* fills the array "sections" with the names of all the sections in the file + * (up to a max of max_sections). + * NOTE: buffers in the sections array must be at least CFG_NAME_LEN big + */ +int cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections); + +/* true if the named section exists, false otherwise */ +int cfg_has_section(struct cfg_file *cfg, const char *sectionname); + +/* returns the number of entries in a section */ +int cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname); + +/* returns the entries in a section as key-value pairs in the "entries" array */ +int cfg_section_entries(struct cfg_file *cfg, const char *sectionname, + struct cfg_entry *entries, int max_entries); + +/* returns a pointer to the value of the named entry in the named section */ +const char *cfg_get_entry(struct cfg_file *cfg, const char *sectionname, + const char *entryname); + +/* true if the given entry exists in the given section, false otherwise */ +int cfg_has_entry(struct cfg_file *cfg, const char *sectionname, + const char *entryname); + +/* cleans up memory allocated by cfg_load() */ +int cfg_close(struct cfg_file *cfg); + +#endif diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c new file mode 100755 index 0000000000..1654c73504 --- /dev/null +++ b/examples/qos_sched/init.c @@ -0,0 +1,385 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" +#include "cfg_file.h" + +uint32_t app_numa_mask = 0; +static uint32_t app_inited_port_mask = 0; + +int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES]; + +#define MAX_NAME_LEN 32 + +struct ring_conf ring_conf = { + .rx_size = APP_RX_DESC_DEFAULT, + .ring_size = APP_RING_SIZE, + .tx_size = APP_TX_DESC_DEFAULT, +}; + +struct burst_conf burst_conf = { + .rx_burst = MAX_PKT_RX_BURST, + .ring_burst = PKT_ENQUEUE, + .qos_dequeue = PKT_DEQUEUE, + .tx_burst = MAX_PKT_TX_BURST, +}; + +struct ring_thresh rx_thresh = { + .pthresh = RX_PTHRESH, + .hthresh = RX_HTHRESH, + .wthresh = RX_WTHRESH, +}; + +struct ring_thresh tx_thresh = { + .pthresh = TX_PTHRESH, + .hthresh = TX_HTHRESH, + .wthresh = TX_WTHRESH, +}; + +uint32_t nb_pfc; +const char *cfg_profile = NULL; +struct flow_conf qos_conf[MAX_DATA_STREAMS]; + +static const struct rte_eth_conf port_conf = { + .rxmode = { + .max_rx_pkt_len = ETHER_MAX_LEN, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_DCB_NONE, + }, +}; + +static int +app_init_port(uint8_t portid, struct rte_mempool *mp) +{ + int ret; + struct rte_eth_link link; + struct rte_eth_rxconf rx_conf; + struct rte_eth_txconf tx_conf; + + /* check if port already initialized (multistream configuration) */ + if (app_inited_port_mask & (1u << portid)) + return 0; + + rx_conf.rx_thresh.pthresh = rx_thresh.pthresh; + rx_conf.rx_thresh.hthresh = rx_thresh.hthresh; + rx_conf.rx_thresh.wthresh = rx_thresh.wthresh; + rx_conf.rx_free_thresh = 32; + rx_conf.rx_drop_en = 0; + + tx_conf.tx_thresh.pthresh = tx_thresh.pthresh; + tx_conf.tx_thresh.hthresh = tx_thresh.hthresh; + tx_conf.tx_thresh.wthresh = tx_thresh.wthresh; + tx_conf.tx_free_thresh = 0; + tx_conf.tx_rs_thresh = 0; + tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS; + + /* init port */ + RTE_LOG(INFO, APP, "Initializing port %hu... ", portid); + fflush(stdout); + ret = rte_eth_dev_configure(portid, 1, 1, &port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%hu\n", + ret, portid); + + /* init one RX queue */ + fflush(stdout); + ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size, + rte_eth_dev_socket_id(portid), &rx_conf, mp); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%hu\n", + ret, portid); + + /* init one TX queue */ + fflush(stdout); + ret = rte_eth_tx_queue_setup(portid, 0, + (uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " + "port=%hu queue=%d\n", + ret, portid, 0); + + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_pmd_port_start: err=%d, port=%hu\n", + ret, portid); + + printf("done: "); + + /* get link status */ + rte_eth_link_get(portid, &link); + if (link.link_status) { + printf(" Link Up - speed %u Mbps - %s\n", + (uint32_t) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + } else { + printf(" Link Down\n"); + } + rte_eth_promiscuous_enable(portid); + + /* mark port as initialized */ + app_inited_port_mask |= 1u << portid; + + return 0; +} + +static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = { + { + .tb_rate = 1250000000, + .tb_size = 1000000, + + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_period = 10, +#endif + }, +}; + +static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = { + { /* Profile #0 */ + .tb_rate = 305175, + .tb_size = 1000000, + + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_weight = {1, 1, 1, 1}, +#endif + + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + }, +}; + +struct rte_sched_port_params port_params = { + .name = "port_0", + .socket = 0, /* computed */ + .rate = 0, /* computed */ + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = pipe_profiles, + .n_pipe_profiles = 1, + +#ifdef RTE_SCHED_RED + .red_params = { + /* Traffic Class 0 Colors Green / Yellow / Red */ + [0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + + /* Traffic Class 1 - Colors Green / Yellow / Red */ + [1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + + /* Traffic Class 2 - Colors Green / Yellow / Red */ + [2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + + /* Traffic Class 3 - Colors Green / Yellow / Red */ + [3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}, + [3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9} + } +#endif /* RTE_SCHED_RED */ +}; + +static struct rte_sched_port * +app_init_sched_port(uint32_t portid, uint32_t socketid) +{ + static char port_name[32]; /* static as referenced from global port_params*/ + struct rte_eth_link link; + struct rte_sched_port *port = NULL; + uint32_t pipe, subport; + int err; + + rte_eth_link_get((uint8_t)portid, &link); + + port_params.socket = socketid; + port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8; + rte_snprintf(port_name, sizeof(port_name), "port_%d", portid); + port_params.name = port_name; + + port = rte_sched_port_config(&port_params); + if (port == NULL){ + rte_exit(EXIT_FAILURE, "Unable to config sched port\n"); + } + + for (subport = 0; subport < port_params.n_subports_per_port; subport ++) { + err = rte_sched_subport_config(port, subport, &subport_params[subport]); + if (err) { + rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n", + subport, err); + } + + for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) { + if (app_pipe_to_profile[subport][pipe] != -1) { + err = rte_sched_pipe_config(port, subport, pipe, + app_pipe_to_profile[subport][pipe]); + if (err) { + rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u " + "for profile %d, err=%d\n", pipe, + app_pipe_to_profile[subport][pipe], err); + } + } + } + } + + return port; +} + +static int +app_load_cfg_profile(const char *profile) +{ + if (profile == NULL) + return 0; + + struct cfg_file *cfg_file = cfg_load(profile, 0); + if (cfg_file == NULL) + rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile); + + cfg_load_port(cfg_file, &port_params); + cfg_load_subport(cfg_file, subport_params); + cfg_load_pipe(cfg_file, pipe_profiles); + + cfg_close(cfg_file); + + return 0; +} + +int app_init(void) +{ + uint32_t i; + char ring_name[MAX_NAME_LEN]; + char pool_name[MAX_NAME_LEN]; + + /* init driver(s) */ + if (rte_pmd_init_all() < 0) + rte_exit(EXIT_FAILURE, "Cannot init PMD\n"); + + if (rte_eal_pci_probe() < 0) + rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); + + if (rte_eth_dev_count() == 0) + rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n"); + + /* load configuration profile */ + if (app_load_cfg_profile(cfg_profile) != 0) + rte_exit(EXIT_FAILURE, "Invalid configuration profile\n"); + + /* Initialize each active flow */ + for(i = 0; i < nb_pfc; i++) { + uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core); + struct rte_ring *ring; + + rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core); + ring = rte_ring_lookup(ring_name); + if (ring == NULL) + qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size, + socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + else + qos_conf[i].rx_ring = ring; + + rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core); + ring = rte_ring_lookup(ring_name); + if (ring == NULL) + qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size, + socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + else + qos_conf[i].tx_ring = ring; + + + /* create the mbuf pools for each RX Port */ + rte_snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i); + qos_conf[i].mbuf_pool = rte_mempool_create(pool_name, NB_MBUF, MBUF_SIZE, + burst_conf.rx_burst * 4, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + rte_eth_dev_socket_id(qos_conf[i].rx_port), + 0); + if (qos_conf[i].mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i); + + //printf("MP = %d\n", rte_mempool_count(qos_conf[i].app_pktmbuf_pool)); + + app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool); + app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool); + + qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].rx_port, socket); + } + + RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n", + rte_get_timer_hz()); + + RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u," + "NIC TX = %u\n", ring_conf.rx_size, NB_MBUF, ring_conf.ring_size, + ring_conf.tx_size); + + RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n" + " Worker read/QoS enqueue = %hu,\n" + " QoS dequeue = %hu, Worker write = %hu\n", + burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst, + burst_conf.qos_dequeue, burst_conf.tx_burst); + + RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu)," + "TX (p = %hhu, h = %hhu, w = %hhu)\n", + rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh, + tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh); + + return 0; +} diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c new file mode 100755 index 0000000000..b6cbe3572a --- /dev/null +++ b/examples/qos_sched/main.c @@ -0,0 +1,246 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "main.h" + +#define APP_MODE_NONE 0 +#define APP_RX_MODE 1 +#define APP_WT_MODE 2 +#define APP_TX_MODE 4 + + +/* main processing loop */ +static int +app_main_loop(__attribute__((unused))void *dummy) +{ + uint32_t lcore_id; + uint32_t i, mode; + uint32_t rx_idx = 0; + uint32_t wt_idx = 0; + uint32_t tx_idx = 0; + struct thread_conf *rx_confs[MAX_DATA_STREAMS]; + struct thread_conf *wt_confs[MAX_DATA_STREAMS]; + struct thread_conf *tx_confs[MAX_DATA_STREAMS]; + + memset(rx_confs, 0, sizeof(rx_confs)); + memset(wt_confs, 0, sizeof(wt_confs)); + memset(tx_confs, 0, sizeof(tx_confs)); + + + mode = APP_MODE_NONE; + lcore_id = rte_lcore_id(); + + for (i = 0; i < nb_pfc; i++) { + struct flow_conf *flow = &qos_conf[i]; + + if (flow->rx_core == lcore_id) { + flow->rx_thread.rx_port = flow->rx_port; + flow->rx_thread.rx_ring = flow->rx_ring; + flow->rx_thread.rx_queue = flow->rx_queue; + + rx_confs[rx_idx++] = &flow->rx_thread; + + mode |= APP_RX_MODE; + } + if (flow->tx_core == lcore_id) { + flow->tx_thread.tx_port = flow->tx_port; + flow->tx_thread.tx_ring = flow->tx_ring; + flow->tx_thread.tx_queue = flow->tx_queue; + + tx_confs[tx_idx++] = &flow->tx_thread; + + mode |= APP_TX_MODE; + } + if (flow->wt_core == lcore_id) { + flow->wt_thread.rx_ring = flow->rx_ring; + flow->wt_thread.tx_ring = flow->tx_ring; + flow->wt_thread.tx_port = flow->tx_port; + flow->wt_thread.sched_port = flow->sched_port; + + wt_confs[wt_idx++] = &flow->wt_thread; + + mode |= APP_WT_MODE; + } + } + + if (mode == APP_MODE_NONE) { + RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id); + return -1; + } + + if (mode == (APP_RX_MODE | APP_WT_MODE)) { + RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n", + lcore_id); + return -1; + } + + RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id); + /* initialize mbuf memory */ + if (mode == APP_RX_MODE) { + for (i = 0; i < rx_idx; i++) { + RTE_LOG(INFO, APP, "flow %u lcoreid %u reading port %hu\n", + i, lcore_id, rx_confs[i]->rx_port); + } + + app_rx_thread(rx_confs); + } + else if (mode == (APP_TX_MODE | APP_WT_MODE)) { + for (i = 0; i < wt_idx; i++) { + wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *) + * burst_conf.tx_burst, CACHE_LINE_SIZE); + + if (wt_confs[i]->m_table == NULL) + rte_panic("flow %u unable to allocate memory buffer\n", i); + + RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write port %hu\n", + i, lcore_id, wt_confs[i]->tx_port); + } + + app_mixed_thread(wt_confs); + } + else if (mode == APP_TX_MODE) { + for (i = 0; i < tx_idx; i++) { + tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *) + * burst_conf.tx_burst, CACHE_LINE_SIZE); + + if (tx_confs[i]->m_table == NULL) + rte_panic("flow %u unable to allocate memory buffer\n", i); + + RTE_LOG(INFO, APP, "flow %u lcoreid %u writing port %hu\n", + i, lcore_id, tx_confs[i]->tx_port); + } + + app_tx_thread(tx_confs); + } + else if (mode == APP_WT_MODE){ + for (i = 0; i < wt_idx; i++) { + RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id); + } + + app_worker_thread(wt_confs); + } + + return 0; +} + +static void +app_stat(void) +{ + uint32_t i; + struct rte_eth_stats stats; + static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS]; + static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS]; + + /* print statistics */ + for(i = 0; i < nb_pfc; i++) { + struct flow_conf *flow = &qos_conf[i]; + + rte_eth_stats_get(flow->rx_port, &stats); + printf("\nRX port %hu: rx: %"PRIu64 " err: %"PRIu64 " no_mbuf: %"PRIu64 "\n", + flow->rx_port, + stats.ipackets - rx_stats[i].ipackets, + stats.ierrors - rx_stats[i].ierrors, + stats.rx_nombuf - rx_stats[i].rx_nombuf); + memcpy(&rx_stats[i], &stats, sizeof(stats)); + + rte_eth_stats_get(flow->tx_port, &stats); + printf("TX port %hu: tx: %" PRIu64 " err: %" PRIu64 "\n", + flow->tx_port, + stats.opackets - tx_stats[i].opackets, + stats.oerrors - tx_stats[i].oerrors); + memcpy(&tx_stats[i], &stats, sizeof(stats)); + + //printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool)); + +#if APP_COLLECT_STAT + printf("-------+------------+------------+\n"); + printf(" | received | dropped |\n"); + printf("-------+------------+------------+\n"); + printf(" RX | %10" PRIu64 " | %10" PRIu64 " |\n", + flow->rx_thread.stat.nb_rx, + flow->rx_thread.stat.nb_drop); + printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " | pps: %"PRIu64 " \n", + flow->wt_thread.stat.nb_rx, + flow->wt_thread.stat.nb_drop, + flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop); + printf("-------+------------+------------+\n"); + + memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat)); + memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat)); +#endif + } +} + + + +int +MAIN(int argc, char **argv) +{ + int ret; + + ret = app_parse_args(argc, argv); + if (ret < 0) + return -1; + + ret = app_init(); + if (ret < 0) + return -1; + + + /* launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER); + + /* print statistics every second */ + while(1) { + sleep(1); + app_stat(); + } +} + + + diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h new file mode 100755 index 0000000000..243064c51d --- /dev/null +++ b/examples/qos_sched/main.h @@ -0,0 +1,186 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef RTE_EXEC_ENV_BAREMETAL +#error "Baremetal is not supported" +#else +#define MAIN main +#endif + +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 + +/* + * Configurable number of RX/TX ring descriptors + */ +#define APP_RX_DESC_DEFAULT 128 +#define APP_TX_DESC_DEFAULT 256 + +#define MBUF_SIZE (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define APP_RING_SIZE (8*1024) +#define NB_MBUF (64*1024*32) + +#define MAX_PKT_RX_BURST 64 +#define PKT_ENQUEUE 64 +#define PKT_DEQUEUE 32 +#define MAX_PKT_TX_BURST 64 + +#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ +#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ +#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ + +#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ +#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ +#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ + +#define BURST_TX_DRAIN_US 100 + +#define MAX_DATA_STREAMS (RTE_MAX_LCORE/2) +#define MAX_SCHED_SUBPORTS 8 +#define MAX_SCHED_PIPES 4096 + +#ifndef APP_COLLECT_STAT +#define APP_COLLECT_STAT 1 +#endif + +#if APP_COLLECT_STAT +#define APP_STATS_ADD(stat,val) (stat) += (val) +#else +#define APP_STATS_ADD(stat,val) do {(void) (val);} while (0) +#endif + +struct thread_stat +{ + uint64_t nb_rx; + uint64_t nb_drop; +}; + + +struct thread_conf +{ + uint32_t counter; + uint32_t n_mbufs; + struct rte_mbuf **m_table; + + uint8_t rx_port; + uint8_t tx_port; + uint16_t rx_queue; + uint16_t tx_queue; + struct rte_ring *rx_ring; + struct rte_ring *tx_ring; + struct rte_sched_port *sched_port; + +#if APP_COLLECT_STAT + struct thread_stat stat; +#endif +} __rte_cache_aligned; + + +struct flow_conf +{ + uint32_t rx_core; + uint32_t wt_core; + uint32_t tx_core; + uint8_t rx_port; + uint8_t tx_port; + uint16_t rx_queue; + uint16_t tx_queue; + struct rte_ring *rx_ring; + struct rte_ring *tx_ring; + struct rte_sched_port *sched_port; + struct rte_mempool *mbuf_pool; + + struct thread_conf rx_thread; + struct thread_conf wt_thread; + struct thread_conf tx_thread; +}; + + +struct ring_conf +{ + uint32_t rx_size; + uint32_t ring_size; + uint32_t tx_size; +}; + +struct burst_conf +{ + uint16_t rx_burst; + uint16_t ring_burst; + uint16_t qos_dequeue; + uint16_t tx_burst; +}; + +struct ring_thresh +{ + uint8_t pthresh; /**< Ring prefetch threshold. */ + uint8_t hthresh; /**< Ring host threshold. */ + uint8_t wthresh; /**< Ring writeback threshold. */ +}; + +extern uint32_t nb_pfc; +extern const char *cfg_profile; +extern struct flow_conf qos_conf[]; +extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES]; + +extern struct ring_conf ring_conf; +extern struct burst_conf burst_conf; +extern struct ring_thresh rx_thresh; +extern struct ring_thresh tx_thresh; + +extern struct rte_sched_port_params port_params; + +int MAIN(int argc, char **argv); +int app_parse_args(int argc, char **argv); +int app_init(void); + +void app_rx_thread(struct thread_conf **qconf); +void app_tx_thread(struct thread_conf **qconf); +void app_worker_thread(struct thread_conf **qconf); +void app_mixed_thread(struct thread_conf **qconf); + + +#ifdef __cplusplus +} +#endif + +#endif /* _MAIN_H_ */ diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg new file mode 100644 index 0000000000..5caa996e3e --- /dev/null +++ b/examples/qos_sched/profile.cfg @@ -0,0 +1,109 @@ +; BSD LICENSE +; +; Copyright(c) 2010-2013 Intel Corporation. All rights reserved. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; + +; This file enables the following hierarchical scheduler configuration for each +; 10GbE output port: +; * Single subport (subport 0): +; - Subport rate set to 100% of port rate +; - Each of the 4 traffic classes has rate set to 100% of port rate +; * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: +; - Pipe rate set to 1/4K of port rate +; - Each of the 4 traffic classes has rate set to 100% of pipe rate +; - Within each traffic class, the byte-level WRR weights for the 4 queues +; are set to 1:1:1:1 +; +; For more details, please refer to chapter "Quality of Service (QoS) Framework" +; of Intel Data Plane Development Kit (Intel DPDK) Programmer's Guide. + +; Port configuration +[port] +frame overhead = 24 +number of subports per port = 1 +number of pipes per subport = 4096 +queue sizes = 64 64 64 64 + +; Subport configuration +[subport 0] +tb rate = 1250000000 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 1250000000 ; Bytes per second +tc 1 rate = 1250000000 ; Bytes per second +tc 2 rate = 1250000000 ; Bytes per second +tc 3 rate = 1250000000 ; Bytes per second +tc period = 10 ; Milliseconds +tc oversubscription period = 10; Milliseconds + +pipe 0-4095 = 0 ; These pipes are configured with pipe profile 0 + +; Pipe configuration +[pipe profile 0] +tb rate = 305175 ; Bytes per second +tb size = 1000000 ; Bytes + +tc 0 rate = 305175 ; Bytes per second +tc 1 rate = 305175 ; Bytes per second +tc 2 rate = 305175 ; Bytes per second +tc 3 rate = 305175 ; Bytes per second +tc period = 40 ; Milliseconds + +tc 0 oversubscription weight = 1 +tc 1 oversubscription weight = 1 +tc 2 oversubscription weight = 1 +tc 3 oversubscription weight = 1 + +tc 0 wrr weights = 1 1 1 1 +tc 1 wrr weights = 1 1 1 1 +tc 2 wrr weights = 1 1 1 1 +tc 3 wrr weights = 1 1 1 1 + +; RED params per traffic class and color (Green / Yellow / Red) +[red] +tc 0 wred min = 48 40 32 +tc 0 wred max = 64 64 64 +tc 0 wred inv prob = 10 10 10 +tc 0 wred weight = 9 9 9 + +tc 1 wred min = 48 40 32 +tc 1 wred max = 64 64 64 +tc 1 wred inv prob = 10 10 10 +tc 1 wred weight = 9 9 9 + +tc 2 wred min = 48 40 32 +tc 2 wred max = 64 64 64 +tc 2 wred inv prob = 10 10 10 +tc 2 wred weight = 9 9 9 + +tc 3 wred min = 48 40 32 +tc 3 wred max = 64 64 64 +tc 3 wred inv prob = 10 10 10 +tc 3 wred weight = 9 9 9 diff --git a/lib/Makefile b/lib/Makefile index 122ba42f29..74162f2c55 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -48,6 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter +DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched DIRS-$(CONFIG_RTE_LIBRTE_PMAC) += librte_pmac ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index b0c735f171..bf4a3e2c76 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -74,6 +74,7 @@ extern struct rte_logs rte_logs; #define RTE_LOGTYPE_PMAC 0x00000200 /**< Log related to PMAC. */ #define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */ #define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */ +#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */ /* these log types can be used in an application */ #define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 27988c3bf4..5d610cb3ea 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -158,6 +158,7 @@ struct rte_pktmbuf { uint16_t hash; uint16_t id; } fdir; /**< Filter identifier if FDIR enabled */ + uint32_t sched; /**< Hierarchical scheduler */ } hash; /**< hash information */ }; diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile new file mode 100644 index 0000000000..5050db0e13 --- /dev/null +++ b/lib/librte_sched/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2013 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_sched.a + +CFLAGS += -O3 +CFLAGS += -g +CFLAGS += $(WERROR_FLAGS) + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c new file mode 100644 index 0000000000..c05e2a7826 --- /dev/null +++ b/lib/librte_sched/rte_approx.c @@ -0,0 +1,197 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include "rte_approx.h" + +/* + * Based on paper "Approximating Rational Numbers by Fractions" by Michal + * Forisek forisek@dcs.fmph.uniba.sk + * + * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal + * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and + * q is minimal. + * + * http://people.ksp.sk/~misof/publications/2007approx.pdf + */ + +/* fraction comparison: compare (a/b) and (c/d) */ +static inline uint32_t +less(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + return (a*d < b*c); +} + +static inline uint32_t +less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + return (a*d <= b*c); +} + +/* check whether a/b is a valid approximation */ +static inline uint32_t +matches(uint32_t a, uint32_t b, + uint32_t alpha_num, uint32_t d_num, uint32_t denum) +{ + if (less_or_equal(a, b, alpha_num - d_num, denum)) + return 0; + + if (less(a ,b, alpha_num + d_num, denum)) + return 1; + + return 0; +} + +static inline void +find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, + uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) +{ + uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b; + uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a; + uint32_t k = (k_num / k_denum) + 1; + + *p = p_b + k * p_a; + *q = q_b + k * q_a; +} + +static inline void +find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, + uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) +{ + uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b; + uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a; + uint32_t k = (k_num / k_denum) + 1; + + *p = p_b + k * p_a; + *q = q_b + k * q_a; +} + +static int +find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) +{ + uint32_t p_a, q_a, p_b, q_b; + + /* check assumptions on the inputs */ + if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) { + return -1; + } + + /* set initial bounds for the search */ + p_a = 0; + q_a = 1; + p_b = 1; + q_b = 1; + + while (1) { + uint32_t new_p_a, new_q_a, new_p_b, new_q_b; + uint32_t x_num, x_denum, x; + int aa, bb; + + /* compute the number of steps to the left */ + x_num = denum * p_b - alpha_num * q_b; + x_denum = - denum * p_a + alpha_num * q_a; + x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */ + + /* check whether we have a valid approximation */ + aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum); + bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum); + if (aa || bb) { + find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q); + return 0; + } + + /* update the interval */ + new_p_a = p_b + (x - 1) * p_a ; + new_q_a = q_b + (x - 1) * q_a; + new_p_b = p_b + x * p_a ; + new_q_b = q_b + x * q_a; + + p_a = new_p_a ; + q_a = new_q_a; + p_b = new_p_b ; + q_b = new_q_b; + + /* compute the number of steps to the right */ + x_num = alpha_num * q_b - denum * p_b; + x_denum = - alpha_num * q_a + denum * p_a; + x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */ + + /* check whether we have a valid approximation */ + aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum); + bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum); + if (aa || bb) { + find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q); + return 0; + } + + /* update the interval */ + new_p_a = p_b + (x - 1) * p_a; + new_q_a = q_b + (x - 1) * q_a; + new_p_b = p_b + x * p_a; + new_q_b = q_b + x * q_a; + + p_a = new_p_a; + q_a = new_q_a; + p_b = new_p_b; + q_b = new_q_b; + } +} + +int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q) +{ + uint32_t alpha_num, d_num, denum; + + /* Check input arguments */ + if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) { + return -1; + } + + if ((p == NULL) || (q == NULL)) { + return -2; + } + + /* Compute alpha_num, d_num and denum */ + denum = 1; + while (d < 1) { + alpha *= 10; + d *= 10; + denum *= 10; + } + alpha_num = (uint32_t) alpha; + d_num = (uint32_t) d; + + /* Perform approximation */ + return find_best_rational_approximation(alpha_num, d_num, denum, p, q); +} diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h new file mode 100644 index 0000000000..d755afa60e --- /dev/null +++ b/lib/librte_sched/rte_approx.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __INCLUDE_RTE_APPROX_H__ +#define __INCLUDE_RTE_APPROX_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Rational Approximation + * + * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal + * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and + * q is minimal. + * + ***/ + +#include + +/** + * Find best rational approximation + * + * @param alpha + * Rational number to approximate + * @param d + * Precision for the rational approximation + * @param p + * Pointer to pre-allocated space where the numerator of the rational + * approximation will be stored when operation is successful + * @param q + * Pointer to pre-allocated space where the denominator of the rational + * approximation will be stored when operation is successful + * @return + * 0 upon success, error code otherwise + */ +int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_APPROX_H__ */ diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h new file mode 100644 index 0000000000..c52db32a78 --- /dev/null +++ b/lib/librte_sched/rte_bitmap.h @@ -0,0 +1,505 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __INCLUDE_RTE_BITMAP_H__ +#define __INCLUDE_RTE_BITMAP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Bitmap + * + * The bitmap component provides a mechanism to manage large arrays of bits + * through bit get/set/clear and bit array scan operations. + * + * The bitmap scan operation is optimized for 64-bit CPUs using 64-byte cache + * lines. The bitmap is hierarchically organized using two arrays (array1 and + * array2), with each bit in array1 being associated with a full cache line + * (512 bits) of bitmap bits, which are stored in array2: the bit in array1 is + * set only when there is at least one bit set within its associated array2 + * bits, otherwise the bit in array1 is cleared. The read and write operations + * for array1 and array2 are always done in slabs of 64 bits. + * + * This bitmap is not thread safe. For lock free operation on a specific bitmap + * instance, a single writer thread performing bit set/clear operations is + * allowed, only the writer thread can do bitmap scan operations, while there + * can be several reader threads performing bit get operations in parallel with + * the writer thread. When the use of locking primitives is acceptable, the + * serialization of the bit set/clear and bitmap scan operations needs to be + * enforced by the caller, while the bit get operation does not require locking + * the bitmap. + * + ***/ + +#include +#include +#include +#include + +#ifndef RTE_BITMAP_OPTIMIZATIONS +#define RTE_BITMAP_OPTIMIZATIONS 1 +#endif +#if RTE_BITMAP_OPTIMIZATIONS +#include +#endif + +/** Number of elements in array1. Each element in array1 is a 64-bit slab. */ +#ifndef RTE_BITMAP_ARRAY1_SIZE +#define RTE_BITMAP_ARRAY1_SIZE 16 +#endif + +/* Slab */ +#define RTE_BITMAP_SLAB_BIT_SIZE 64 +#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6 +#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1) + +/* Cache line (CL) */ +#define RTE_BITMAP_CL_BIT_SIZE (CACHE_LINE_SIZE * 8) +#define RTE_BITMAP_CL_BIT_SIZE_LOG2 9 +#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1) + +#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE) +#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 3 +#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1) + +/** Bitmap data structure */ +struct rte_bitmap { + uint64_t array1[RTE_BITMAP_ARRAY1_SIZE]; /**< Bitmap array1 */ + uint64_t *array2; /**< Bitmap array2 */ + uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */ + uint32_t array2_size; /**< Number of 64-bit slabs in array2 */ + + /* Context for the "scan next" operation */ + uint32_t index1; /**< Bitmap scan: Index of current array1 slab */ + uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */ + uint32_t index2; /**< Bitmap scan: Index of current array2 slab */ + uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */ +} __rte_cache_aligned; + +static inline void +__rte_bitmap_index1_inc(struct rte_bitmap *bmp) +{ + bmp->index1 = (bmp->index1 + 1) & (RTE_BITMAP_ARRAY1_SIZE - 1); +} + +static inline uint64_t +__rte_bitmap_mask1_get(struct rte_bitmap *bmp) +{ + return ((~1lu) << bmp->offset1); +} + +static inline void +__rte_bitmap_index2_set(struct rte_bitmap *bmp) +{ + bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2); +} + +#if RTE_BITMAP_OPTIMIZATIONS + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + if (likely(slab == 0)) { + return 0; + } + + *pos = __builtin_ctzll(slab); + return 1; +} + +#else + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + uint64_t mask; + uint32_t i; + + if (likely(slab == 0)) { + return 0; + } + + for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) { + if (unlikely(slab & mask)) { + *pos = i; + return 1; + } + } + + return 0; +} + +#endif + +static inline void +__rte_bitmap_scan_init(struct rte_bitmap *bmp) +{ + bmp->index1 = RTE_BITMAP_ARRAY1_SIZE - 1; + bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1; + __rte_bitmap_index2_set(bmp); + bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE; + + bmp->go2 = 0; +} + +/** + * Bitmap initialization + * + * @param bmp + * Handle to bitmap instance + * @param array2 + * Base address of pre-allocated array2 + * @param n_bits + * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @return + * 0 upon success, error code otherwise + */ +static inline int +rte_bitmap_init(struct rte_bitmap *bmp, uint8_t *array2, uint32_t n_bits) +{ + uint32_t array1_size, array2_size; + + /* Check input arguments */ + if ((bmp == NULL) || + (array2 == NULL) || (((uintptr_t) array2) & CACHE_LINE_MASK) || + (n_bits == 0) || (n_bits & RTE_BITMAP_CL_BIT_MASK)){ + return -1; + } + + array2_size = n_bits / RTE_BITMAP_SLAB_BIT_SIZE; + array1_size = ((n_bits / RTE_BITMAP_CL_BIT_SIZE) + (RTE_BITMAP_SLAB_BIT_SIZE - 1)) / RTE_BITMAP_SLAB_BIT_SIZE; + if (array1_size > RTE_BITMAP_ARRAY1_SIZE){ + return -1; + } + + /* Setup bitmap */ + memset(bmp, 0, sizeof(struct rte_bitmap)); + bmp->array2 = (uint64_t *) array2; + bmp->array1_size = array1_size; + bmp->array2_size = array2_size; + __rte_bitmap_scan_init(bmp); + + return 0; +} + +/** + * Bitmap free + * + * @param bmp + * Handle to bitmap instance + * @return + * 0 upon success, error code otherwise + */ +static inline int +rte_bitmap_free(struct rte_bitmap *bmp) +{ + /* Check input arguments */ + if (bmp == NULL) { + return -1; + } + + return 0; +} + +/** + * Bitmap reset + * + * @param bmp + * Handle to bitmap instance + */ +static inline void +rte_bitmap_reset(struct rte_bitmap *bmp) +{ + memset(bmp->array1, 0, sizeof(bmp->array1)); + memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t)); + __rte_bitmap_scan_init(bmp); +} + +/** + * Bitmap location prefetch into CPU L1 cache + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 upon success, error code otherwise + */ +static inline void +rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + slab2 = bmp->array2 + index2; + rte_prefetch0((void *) slab2); +} + +/** + * Bitmap bit get + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 when bit is cleared, non-zero when bit is set + */ +static inline uint64_t +rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2, offset2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + return ((*slab2) & (1lu << offset2)); +} + +/** + * Bitmap bit set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Set bit in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= 1lu << offset2; + *slab1 |= 1lu << offset1; +} + +/** + * Bitmap slab set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position identifying the array2 slab + * @param slab + * Value to be assigned to the 64-bit slab in array2 + */ +static inline void +rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1; + + /* Set bits in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= slab; + *slab1 |= 1lu << offset1; +} + +static inline uint64_t +__rte_bitmap_line_not_empty(uint64_t *slab2) +{ + uint64_t v1, v2, v3, v4; + + v1 = slab2[0] | slab2[1]; + v2 = slab2[2] | slab2[3]; + v3 = slab2[4] | slab2[5]; + v4 = slab2[6] | slab2[7]; + v1 |= v2; + v3 |= v4; + + return (v1 | v3); +} + +/** + * Bitmap bit clear + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Clear bit in array2 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + + /* Return if array2 slab is not all-zeros */ + *slab2 &= ~(1lu << offset2); + if (*slab2){ + return; + } + + /* Check the entire cache line of array2 for all-zeros */ + index2 &= ~ RTE_BITMAP_CL_SLAB_MASK; + slab2 = bmp->array2 + index2; + if (__rte_bitmap_line_not_empty(slab2)) { + return; + } + + /* The array2 cache line is all-zeros, so clear bit in array1 slab */ + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab1 = bmp->array1 + index1; + *slab1 &= ~(1lu << offset1); + + return; +} + +static inline int +__rte_bitmap_scan_search(struct rte_bitmap *bmp) +{ + uint64_t value1; + uint32_t i; + + /* Check current array1 slab */ + value1 = bmp->array1[bmp->index1]; + value1 &= __rte_bitmap_mask1_get(bmp); + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + + __rte_bitmap_index1_inc(bmp); + bmp->offset1 = 0; + + /* Look for another array1 slab */ + for (i = 0; i < RTE_BITMAP_ARRAY1_SIZE; i ++, __rte_bitmap_index1_inc(bmp)) { + value1 = bmp->array1[bmp->index1]; + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + } + + return 0; +} + +static inline void +__rte_bitmap_scan_read_init(struct rte_bitmap *bmp) +{ + __rte_bitmap_index2_set(bmp); + bmp->go2 = 1; + rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8)); +} + +static inline int +__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + uint64_t *slab2; + + slab2 = bmp->array2 + bmp->index2; + for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) { + if (*slab2) { + *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + *slab = *slab2; + + bmp->index2 ++; + slab2 ++; + bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK; + return 1; + } + } + + return 0; +} + +/** + * Bitmap scan (with automatic wrap-around) + * + * @param bmp + * Handle to bitmap instance + * @param pos + * When function call returns 1, pos contains the position of the next set + * bit, otherwise not modified + * @param slab + * When function call returns 1, slab contains the value of the entire 64-bit + * slab where the bit indicated by pos is located. Slabs are always 64-bit + * aligned, so the position of the first bit of the slab (this bit is not + * necessarily set) is pos / 64. Once a slab has been returned by the bitmap + * scan operation, the internal pointers of the bitmap are updated to point + * after this slab, so the same slab will not be returned again if it + * contains more than one bit which is set. When function call returns 0, + * slab is not modified. + * @return + * 0 if there is no bit set in the bitmap, 1 otherwise + */ +static inline int +rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + /* Return data from current array2 line if available */ + if (__rte_bitmap_scan_read(bmp, pos, slab)) { + return 1; + } + + /* Look for non-empty array2 line */ + if (__rte_bitmap_scan_search(bmp)) { + __rte_bitmap_scan_read_init(bmp); + __rte_bitmap_scan_read(bmp, pos, slab); + return 1; + } + + /* Empty bitmap */ + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_BITMAP_H__ */ diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c new file mode 100644 index 0000000000..0eaf5a03ab --- /dev/null +++ b/lib/librte_sched/rte_red.c @@ -0,0 +1,160 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "rte_red.h" +#include + +#ifdef __INTEL_COMPILER +#pragma warning(disable:2259) /* conversion may lose significant bits */ +#endif + +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +static int rte_red_init_done = 0; /**< Flag to indicate that global initialisation is done */ +uint32_t rte_red_rand_val = 0; /**< Random value cache */ +uint32_t rte_red_rand_seed = 0; /**< Seed for random number generation */ + +/** + * table[i] = log2(1-Wq) * Scale * -1 + * Wq = 1/(2^i) + */ +uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM]; + +/** + * table[i] = 2^(i/16) * Scale + */ +uint16_t rte_red_pow2_frac_inv[16]; + +/** + * @brief Initialize tables used to compute average + * queue size when queue is empty. + */ +static void +__rte_red_init_tables(void) +{ + uint32_t i = 0; + double scale = 0.0; + double table_size = 0.0; + + scale = (double)(1 << RTE_RED_SCALING); + table_size = (double)(DIM(rte_red_pow2_frac_inv)); + + for (i = 0; i < DIM(rte_red_pow2_frac_inv); i++) { + double m = (double)i; + + rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size)); + } + + scale = 1024.0; + + RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == DIM(rte_red_log2_1_minus_Wq)); + + for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) { + double n = (double)i; + double Wq = pow(2, -n); + uint32_t index = i - RTE_RED_WQ_LOG2_MIN; + + rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq)); + /** + * Table entry of zero, corresponds to a Wq of zero + * which is not valid (avg would remain constant no + * matter how long the queue is empty). So we have + * to check for zero and round up to one. + */ + if (rte_red_log2_1_minus_Wq[index] == 0) { + rte_red_log2_1_minus_Wq[index] = 1; + } + } +} + +int +rte_red_rt_data_init(struct rte_red *red) +{ + if (red == NULL) + return -1; + + red->avg = 0; + red->count = 0; + red->q_time = 0; + return 0; +} + +int +rte_red_config_init(struct rte_red_config *red_cfg, + const uint16_t wq_log2, + const uint16_t min_th, + const uint16_t max_th, + const uint16_t maxp_inv) +{ + if (red_cfg == NULL) { + return -1; + } + if (max_th > RTE_RED_MAX_TH_MAX) { + return -2; + } + if (min_th >= max_th) { + return -3; + } + if (wq_log2 > RTE_RED_WQ_LOG2_MAX) { + return -4; + } + if (wq_log2 < RTE_RED_WQ_LOG2_MIN) { + return -5; + } + if (maxp_inv < RTE_RED_MAXP_INV_MIN) { + return -6; + } + if (maxp_inv > RTE_RED_MAXP_INV_MAX) { + return -7; + } + + /** + * Initialize the RED module if not already done + */ + if (!rte_red_init_done) { + rte_red_rand_seed = rte_rand(); + rte_red_rand_val = rte_fast_rand(); + __rte_red_init_tables(); + rte_red_init_done = 1; + } + + red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING); + red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING); + red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING; + red_cfg->maxp_inv = maxp_inv; + red_cfg->wq_log2 = wq_log2; + + return 0; +} diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h new file mode 100644 index 0000000000..debe556f4c --- /dev/null +++ b/lib/librte_sched/rte_red.h @@ -0,0 +1,454 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __RTE_RED_H_INCLUDED__ +#define __RTE_RED_H_INCLUDED__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Random Early Detection (RED) + * + * + ***/ + +#include +#include +#include +#include +#include +#include + +#define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */ +#define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */ +#define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */ +#define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */ +#define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */ +#define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */ +#define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */ +#define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */ +#define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT) +#define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1) + +#ifdef RTE_RED_DEBUG + +#define RTE_RED_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \ +} + +#else + +#define RTE_RED_ASSERT(exp) do { } while(0) + +#endif /* RTE_RED_DEBUG */ + +/** + * Externs + * + */ +extern uint32_t rte_red_rand_val; +extern uint32_t rte_red_rand_seed; +extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM]; +extern uint16_t rte_red_pow2_frac_inv[16]; + +/** + * RED configuration parameters passed by user + * + */ +struct rte_red_params { + uint16_t min_th; /**< Minimum threshold for queue (max_th) */ + uint16_t max_th; /**< Maximum threshold for queue (max_th) */ + uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */ + uint16_t wq_log2; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */ +}; + +/** + * RED configuration parameters + */ +struct rte_red_config { + uint32_t min_th; /**< min_th scaled in fixed-point format */ + uint32_t max_th; /**< max_th scaled in fixed-point format */ + uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */ + uint8_t maxp_inv; /**< maxp_inv */ + uint8_t wq_log2; /**< wq_log2 */ +}; + +/** + * RED run-time data + */ +struct rte_red { + uint32_t avg; /**< Average queue size (avg), scaled in fixed-point format */ + uint32_t count; /**< Number of packets since last marked packet (count) */ + uint64_t q_time; /**< Start of the queue idle time (q_time) */ +}; + +/** + * @brief Initialises run-time data + * + * @param [in,out] data pointer to RED runtime data + * + * @return Operation status + * @retval 0 success + * @retval !0 error + */ +int +rte_red_rt_data_init(struct rte_red *red); + +/** + * @brief Configures a single RED configuration parameter structure. + * + * @param [in,out] config pointer to a RED configuration parameter structure + * @param [in] wq_log2 log2 of the filter weight, valid range is: + * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX + * @param [in] min_th queue minimum threshold in number of packets + * @param [in] max_th queue maximum threshold in number of packets + * @param [in] maxp_inv inverse maximum mark probability + * + * @return Operation status + * @retval 0 success + * @retval !0 error + */ +int +rte_red_config_init(struct rte_red_config *red_cfg, + const uint16_t wq_log2, + const uint16_t min_th, + const uint16_t max_th, + const uint16_t maxp_inv); + +/** + * @brief Generate random number for RED + * + * Implemenetation based on: + * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/ + * + * 10 bit shift has been found through empirical tests (was 16). + * + * @return Random number between 0 and (2^22 - 1) + */ +static inline uint32_t +rte_fast_rand(void) +{ + rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011; + return (rte_red_rand_seed >> 10); +} + +/** + * @brief calculate factor to scale average queue size when queue + * becomes empty + * + * @param [in] wq_log2, where EWMA filter weight wq = 1/(2 ^ wq_log2) + * @param [in] m exponent in the computed value (1 - wq) ^ m + * + * @return computed value + * @retval ((1 - wq) ^ m) scaled in fixed-point format + */ +static inline uint16_t +__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m) +{ + uint32_t n = 0; + uint32_t f = 0; + + /** + * Basic math tells us that: + * a^b = 2^(b * log2(a) ) + * + * in our case: + * a = (1-Wq) + * b = m + * Wq = 1/ (2^log2n) + * + * So we are computing this equation: + * factor = 2 ^ ( m * log2(1-Wq)) + * + * First we are computing: + * n = m * log2(1-Wq) + * + * To avoid dealing with signed numbers log2 values are positive + * but they should be negative because (1-Wq) is always < 1. + * Contents of log2 table values are also scaled for precision. + */ + + n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN]; + + /** + * The tricky part is computing 2^n, for this I split n into + * integer part and fraction part. + * f - is fraction part of n + * n - is integer part of original n + * + * Now using basic math we compute 2^n: + * 2^(f+n) = 2^f * 2^n + * 2^f - we use lookup table + * 2^n - can be replaced with bit shift right oeprations + */ + + f = (n >> 6) & 0xf; + n >>= 10; + + if (n < RTE_RED_SCALING) + return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n); + + return 0; +} + +/** + * @brief Updates queue average in condition when queue is empty + * + * Note: packet is never dropped in this particular case. + * + * @param [in] config pointer to a RED configuration parameter structure + * @param [in,out] data pointer to RED runtime data + * @param [in] time current time stamp + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criterion + * @retval 2 drop the packet based on mark probability criterion + */ +static inline int +rte_red_enqueue_empty(const struct rte_red_config *red_cfg, + struct rte_red *red, + const uint64_t time) +{ + uint64_t time_diff = 0, m = 0; + + RTE_RED_ASSERT(red_cfg != NULL); + RTE_RED_ASSERT(red != NULL); + + red->count ++; + + /** + * We compute avg but we don't compare avg against + * min_th or max_th, nor calculate drop probability + */ + time_diff = time - red->q_time; + + /** + * m is the number of packets that might have arrived while the queue was empty. + * In this case we have time stamps provided by scheduler in byte units (bytes + * transmitted on network port). Such time stamp translates into time units as + * port speed is fixed but such approach simplifies the code. + */ + m = time_diff / RTE_RED_S; + + /** + * Check that m will fit into 16-bit unsigned integer + */ + if (m >= RTE_RED_2POW16) { + red->avg = 0; + } else { + red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m); + } + + return 0; +} + +/** + * Drop probability (Sally Floyd and Van Jacobson): + * + * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th) + * pa = pb / (2 - count * pb) + * + * + * (1 / maxp_inv) * (avg - min_th) + * --------------------------------- + * max_th - min_th + * pa = ----------------------------------------------- + * count * (1 / maxp_inv) * (avg - min_th) + * 2 - ----------------------------------------- + * max_th - min_th + * + * + * avg - min_th + * pa = ----------------------------------------------------------- + * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th) + * + * + * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then: + * + * + * avg - min_th + * pa = ----------------------------------- + * pa_const - count * (avg - min_th) + */ + +/** + * @brief make a decision to drop or enqueue a packet based on mark probability + * criteria + * + * @param [in] config pointer to structure defining RED parameters + * @param [in,out] data pointer to RED runtime data + * + * @return operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet + */ +static inline int +__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red) +{ + uint32_t pa_num = 0; /* numerator of drop-probability */ + uint32_t pa_den = 0; /* denominator of drop-probability */ + uint32_t pa_num_count = 0; + + pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2); + + pa_num_count = red->count * pa_num; + + if (red_cfg->pa_const <= pa_num_count) + return 1; + + pa_den = red_cfg->pa_const - pa_num_count; + + /* If drop, generate and save random number to be used next time */ + if (unlikely((rte_red_rand_val % pa_den) < pa_num)) { + rte_red_rand_val = rte_fast_rand(); + + return 1; + } + + /* No drop */ + return 0; +} + +/** + * @brief Decides if new packet should be enqeued or dropped in queue non-empty case + * + * @param [in] config pointer to a RED configuration parameter structure + * @param [in,out] data pointer to RED runtime data + * @param [in] q current queue size (measured in packets) + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criterion + * @retval 2 drop the packet based on mark probability criterion + */ +static inline int +rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg, + struct rte_red *red, + const unsigned q) +{ + RTE_RED_ASSERT(red_cfg != NULL); + RTE_RED_ASSERT(red != NULL); + + /** + * EWMA filter (Sally Floyd and Van Jacobson): + * avg = (1 - wq) * avg + wq * q + * avg = avg + q * wq - avg * wq + * + * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get: + * avg_s = avg_s + q * 2^N - avg_s * 2^(-n) + * + * By using shift left/right operations, we get: + * avg_s = avg_s + (q << N) - (avg_s >> n) + * avg_s += (q << N) - (avg_s >> n) + */ + + /* avg update */ + red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2); + + /* avg < min_th: do not mark the packet */ + if (red->avg < red_cfg->min_th) { + red->count ++; + return 0; + } + + /* min_th <= avg < max_th: mark the packet with pa probability */ + if (red->avg < red_cfg->max_th) { + if (!__rte_red_drop(red_cfg, red)) { + red->count ++; + return 0; + } + + red->count = 0; + return 2; + } + + /* max_th <= avg: always mark the packet */ + red->count = 0; + return 1; +} + +/** + * @brief Decides if new packet should be enqeued or dropped + * Updates run time data based on new queue size value. + * Based on new queue average and RED configuration parameters + * gives verdict whether to enqueue or drop the packet. + * + * @param [in] config pointer to a RED configuration parameter structure + * @param [in,out] data pointer to RED runtime data + * @param [in] q updated queue size in packets + * @param [in] time current time stamp + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criteria + * @retval 2 drop the packet based on mark probability criteria + */ +static inline int +rte_red_enqueue(const struct rte_red_config *red_cfg, + struct rte_red *red, + const unsigned q, + const uint64_t time) +{ + RTE_RED_ASSERT(red_cfg != NULL); + RTE_RED_ASSERT(red != NULL); + + if (q != 0) { + return rte_red_enqueue_nonempty(red_cfg, red, q); + } else { + return rte_red_enqueue_empty(red_cfg, red, time); + } +} + +/** + * @brief Callback to records time that queue became empty + * + * @param [in,out] data pointer to RED runtime data + * @param [in] time current time stamp + */ +static inline void +rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time) +{ + red->q_time = time; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __RTE_RED_H_INCLUDED__ */ diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c new file mode 100644 index 0000000000..daa1a0d9e3 --- /dev/null +++ b/lib/librte_sched/rte_sched.c @@ -0,0 +1,2129 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_sched.h" +#include "rte_bitmap.h" +#include "rte_sched_common.h" +#include "rte_approx.h" + +#ifdef __INTEL_COMPILER +#pragma warning(disable:2259) /* conversion may lose significant bits */ +#endif + +#ifndef RTE_SCHED_DEBUG +#define RTE_SCHED_DEBUG 0 +#endif + +#ifndef RTE_SCHED_OPTIMIZATIONS +#define RTE_SCHED_OPTIMIZATIONS 0 +#endif + +#if RTE_SCHED_OPTIMIZATIONS +#include +#endif + +#define RTE_SCHED_ENQUEUE 1 + +#define RTE_SCHED_TS 1 + +#if RTE_SCHED_TS == 0 /* Infinite credits. Traffic shaping disabled. */ +#define RTE_SCHED_TS_CREDITS_UPDATE 0 +#define RTE_SCHED_TS_CREDITS_CHECK 0 +#else /* Real Credits. Full traffic shaping implemented. */ +#define RTE_SCHED_TS_CREDITS_UPDATE 1 +#define RTE_SCHED_TS_CREDITS_CHECK 1 +#endif + +#ifndef RTE_SCHED_TB_RATE_CONFIG_ERR +#define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7) +#endif + +#define RTE_SCHED_WRR 1 + +#ifndef RTE_SCHED_WRR_SHIFT +#define RTE_SCHED_WRR_SHIFT 3 +#endif + +#ifndef RTE_SCHED_PORT_N_GRINDERS +#define RTE_SCHED_PORT_N_GRINDERS 8 +#endif +#if (RTE_SCHED_PORT_N_GRINDERS == 0) || (RTE_SCHED_PORT_N_GRINDERS & (RTE_SCHED_PORT_N_GRINDERS - 1)) +#error Number of grinders must be non-zero and a power of 2 +#endif +#if (RTE_SCHED_OPTIMIZATIONS && (RTE_SCHED_PORT_N_GRINDERS != 8)) +#error Number of grinders must be 8 when RTE_SCHED_OPTIMIZATIONS is set +#endif + +#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE) + +#define RTE_SCHED_PIPE_INVALID UINT32_MAX + +#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX + +struct rte_sched_subport { + /* Token bucket (TB) */ + uint64_t tb_time; /* time of last update */ + uint32_t tb_period; + uint32_t tb_credits_per_period; + uint32_t tb_size; + uint32_t tb_credits; + + /* Traffic classes (TCs) */ + uint64_t tc_time; /* time of next update */ + uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_period; + + /* TC oversubscription */ + uint32_t tc_ov_period; + uint64_t tc_ov_time; + uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint8_t tc_ov_period_id; + uint8_t tc_ov[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_ov_n[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + double tc_ov_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + + /* Statistics */ + struct rte_sched_subport_stats stats; +}; + +struct rte_sched_pipe_profile { + /* Token bucket (TB) */ + uint32_t tb_period; + uint32_t tb_credits_per_period; + uint32_t tb_size; + + /* Pipe traffic classes */ + uint32_t tc_period; + uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + + /* Pipe queues */ + uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_PIPE]; +}; + +struct rte_sched_pipe { + /* Token bucket (TB) */ + uint64_t tb_time; /* time of last update */ + uint32_t tb_credits; + + /* Pipe profile and flags */ + uint32_t profile; + + /* Traffic classes (TCs) */ + uint64_t tc_time; /* time of next update */ + uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + + /* Weighted Round Robin (WRR) */ + uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE]; + + /* TC oversubscription */ +#ifdef RTE_SCHED_SUBPORT_TC_OV + uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint8_t tc_ov_period_id; +#else + uint64_t reserved; +#endif +} __rte_cache_aligned; + +struct rte_sched_queue { + uint16_t qw; + uint16_t qr; +}; + +struct rte_sched_queue_extra { + struct rte_sched_queue_stats stats; +#ifdef RTE_SCHED_RED + struct rte_red red; +#endif +}; + +enum grinder_state { + e_GRINDER_PREFETCH_PIPE = 0, + e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS, + e_GRINDER_PREFETCH_MBUF, + e_GRINDER_READ_MBUF +}; + +struct rte_sched_grinder { + /* Pipe cache */ + uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE]; + uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE]; + uint32_t pcache_w; + uint32_t pcache_r; + + /* Current pipe */ + enum grinder_state state; + uint32_t productive; + uint32_t pindex; + struct rte_sched_subport *subport; + struct rte_sched_pipe *pipe; + struct rte_sched_pipe_profile *pipe_params; + + /* TC cache */ + uint8_t tccache_qmask[4]; + uint32_t tccache_qindex[4]; + uint32_t tccache_w; + uint32_t tccache_r; + + /* Current TC */ + uint32_t tc_index; + struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint16_t qsize; + uint32_t qmask; + uint32_t qpos; + struct rte_mbuf *pkt; + + double ov_coef; + + uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; +}; + +struct rte_sched_port { + /* User parameters */ + uint32_t n_subports_per_port; + uint32_t n_pipes_per_subport; + uint32_t rate; + uint32_t frame_overhead; + uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t n_pipe_profiles; +#ifdef RTE_SCHED_RED + struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; +#endif + + /* Timing */ + uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ + uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ + uint64_t time; /* Current NIC TX time measured in bytes */ + double cycles_per_byte; /* CPU cycles per byte */ + + /* Scheduling loop detection */ + uint32_t pipe_loop; + uint32_t pipe_exhaustion; + + /* Bitmap */ + struct rte_bitmap bmp; + uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16; + + /* Grinders */ + struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS]; + uint32_t busy_grinders; + struct rte_mbuf **pkts_out; + uint32_t n_pkts_out; + + /* Queue base calculation */ + uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE]; + uint32_t qsize_sum; + + /* Large data structures */ + struct rte_sched_subport *subport; + struct rte_sched_pipe *pipe; + struct rte_sched_queue *queue; + struct rte_sched_queue_extra *queue_extra; + struct rte_sched_pipe_profile *pipe_profiles; + uint8_t *bmp_array; + struct rte_mbuf **queue_array; + uint8_t memory[0] __rte_cache_aligned; +} __rte_cache_aligned; + +enum rte_sched_port_array { + e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0, + e_RTE_SCHED_PORT_ARRAY_PIPE, + e_RTE_SCHED_PORT_ARRAY_QUEUE, + e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA, + e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES, + e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY, + e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY, + e_RTE_SCHED_PORT_ARRAY_TOTAL, +}; + +#ifdef RTE_SCHED_COLLECT_STATS + +static inline uint32_t +rte_sched_port_queues_per_subport(struct rte_sched_port *port) +{ + return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport; +} + +#endif + +static inline uint32_t +rte_sched_port_queues_per_port(struct rte_sched_port *port) +{ + return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port; +} + +static int +rte_sched_port_check_params(struct rte_sched_port_params *params) +{ + uint32_t i, j; + + if (params == NULL) { + return -1; + } + + /* name */ + if (params->name == NULL) { + return -2; + } + + /* socket */ + if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES)) { + return -3; + } + + /* rate */ + if (params->rate == 0) { + return -4; + } + + /* n_subports_per_port: non-zero, power of 2 */ + if ((params->n_subports_per_port == 0) || (!rte_is_power_of_2(params->n_subports_per_port))) { + return -5; + } + + /* n_pipes_per_subport: non-zero, power of 2 */ + if ((params->n_pipes_per_subport == 0) || (!rte_is_power_of_2(params->n_pipes_per_subport))) { + return -6; + } + + /* qsize: non-zero, power of 2, no bigger than 32K (due to 16-bit read/write pointers) */ + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + uint16_t qsize = params->qsize[i]; + + if ((qsize == 0) || (!rte_is_power_of_2(qsize))) { + return -7; + } + } + + /* pipe_profiles and n_pipe_profiles */ + if ((params->pipe_profiles == NULL) || + (params->n_pipe_profiles == 0) || + (params->n_pipe_profiles > RTE_SCHED_PIPE_PROFILES_PER_PORT)) { + return -8; + } + + for (i = 0; i < params->n_pipe_profiles; i ++) { + struct rte_sched_pipe_params *p = params->pipe_profiles + i; + + /* TB rate: non-zero, not greater than port rate */ + if ((p->tb_rate == 0) || (p->tb_rate > params->rate)) { + return -9; + } + + /* TB size: non-zero */ + if (p->tb_size == 0) { + return -10; + } + + /* TC rate: non-zero, less than pipe rate */ + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) { + if ((p->tc_rate[j] == 0) || (p->tc_rate[j] > p->tb_rate)) { + return -11; + } + } + + /* TC period: non-zero */ + if (p->tc_period == 0) { + return -12; + } + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* TC oversubscription weights: non-zero */ + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) { + if (p->tc_ov_weight[j] == 0) { + return -13; + } + } +#endif + + /* Queue WRR weights: non-zero */ + for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j ++) { + if (p->wrr_weights[j] == 0) { + return -14; + } + } + } + + return 0; +} + +static uint32_t +rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sched_port_array array) +{ + uint32_t n_subports_per_port = params->n_subports_per_port; + uint32_t n_pipes_per_subport = params->n_pipes_per_subport; + uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port; + uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port; + + uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport); + uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe); + uint32_t size_queue = n_queues_per_port * sizeof(struct rte_sched_queue); + uint32_t size_queue_extra = n_queues_per_port * sizeof(struct rte_sched_queue_extra); + uint32_t size_pipe_profiles = RTE_SCHED_PIPE_PROFILES_PER_PORT * sizeof(struct rte_sched_pipe_profile); + uint32_t size_bmp_array = n_queues_per_port / 8; + uint32_t size_per_pipe_queue_array, size_queue_array; + + uint32_t base, i; + + size_per_pipe_queue_array = 0; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * params->qsize[i] * sizeof(struct rte_mbuf *); + } + size_queue_array = n_pipes_per_port * size_per_pipe_queue_array; + + base = 0; + + if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT) return base; + base += CACHE_LINE_ROUNDUP(size_subport); + + if (array == e_RTE_SCHED_PORT_ARRAY_PIPE) return base; + base += CACHE_LINE_ROUNDUP(size_pipe); + + if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE) return base; + base += CACHE_LINE_ROUNDUP(size_queue); + + if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA) return base; + base += CACHE_LINE_ROUNDUP(size_queue_extra); + + if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES) return base; + base += CACHE_LINE_ROUNDUP(size_pipe_profiles); + + if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY) return base; + base += CACHE_LINE_ROUNDUP(size_bmp_array); + + if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY) return base; + base += CACHE_LINE_ROUNDUP(size_queue_array); + + return base; +} + +uint32_t +rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params) +{ + uint32_t size0, size1; + int status; + + status = rte_sched_port_check_params(params); + if (status != 0) { + RTE_LOG(INFO, SCHED, "Port scheduler params check failed (%d)\n", status); + + return 0; + } + + size0 = sizeof(struct rte_sched_port); + size1 = rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_TOTAL); + + return (size0 + size1); +} + +static void +rte_sched_port_config_qsize(struct rte_sched_port *port) +{ + /* TC 0 */ + port->qsize_add[0] = 0; + port->qsize_add[1] = port->qsize_add[0] + port->qsize[0]; + port->qsize_add[2] = port->qsize_add[1] + port->qsize[0]; + port->qsize_add[3] = port->qsize_add[2] + port->qsize[0]; + + /* TC 1 */ + port->qsize_add[4] = port->qsize_add[3] + port->qsize[0]; + port->qsize_add[5] = port->qsize_add[4] + port->qsize[1]; + port->qsize_add[6] = port->qsize_add[5] + port->qsize[1]; + port->qsize_add[7] = port->qsize_add[6] + port->qsize[1]; + + /* TC 2 */ + port->qsize_add[8] = port->qsize_add[7] + port->qsize[1]; + port->qsize_add[9] = port->qsize_add[8] + port->qsize[2]; + port->qsize_add[10] = port->qsize_add[9] + port->qsize[2]; + port->qsize_add[11] = port->qsize_add[10] + port->qsize[2]; + + /* TC 3 */ + port->qsize_add[12] = port->qsize_add[11] + port->qsize[2]; + port->qsize_add[13] = port->qsize_add[12] + port->qsize[3]; + port->qsize_add[14] = port->qsize_add[13] + port->qsize[3]; + port->qsize_add[15] = port->qsize_add[14] + port->qsize[3]; + + port->qsize_sum = port->qsize_add[15] + port->qsize[3]; +} + +static void +rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i) +{ + struct rte_sched_pipe_profile *p = port->pipe_profiles + i; + + RTE_LOG(INFO, SCHED, "Low level config for pipe profile %u:\n" + "\tToken bucket: period = %u, credits per period = %u, size = %u\n" + "\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov weights = [%hhu, %hhu, %hhu, %hhu]\n" + "\tWRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n", + i, + + /* Token bucket */ + p->tb_period, + p->tb_credits_per_period, + p->tb_size, + + /* Traffic classes */ + p->tc_period, + p->tc_credits_per_period[0], + p->tc_credits_per_period[1], + p->tc_credits_per_period[2], + p->tc_credits_per_period[3], + p->tc_ov_weight[0], + p->tc_ov_weight[1], + p->tc_ov_weight[2], + p->tc_ov_weight[3], + + /* WRR */ + p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3], + p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7], + p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11], + p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]); +} + +static inline uint64_t +rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate) +{ + uint64_t time = time_ms; + time = (time * rate) / 1000; + + return time; +} + +static void +rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params) +{ + uint32_t i, j; + + for (i = 0; i < port->n_pipe_profiles; i ++) { + struct rte_sched_pipe_params *src = params->pipe_profiles + i; + struct rte_sched_pipe_profile *dst = port->pipe_profiles + i; + + /* Token Bucket */ + if (src->tb_rate == params->rate) { + dst->tb_credits_per_period = 1; + dst->tb_period = 1; + } else { + double tb_rate = ((double) src->tb_rate) / ((double) params->rate); + double d = RTE_SCHED_TB_RATE_CONFIG_ERR; + + rte_approx(tb_rate, d, &dst->tb_credits_per_period, &dst->tb_period); + } + dst->tb_size = src->tb_size; + + /* Traffic Classes */ + dst->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, params->rate); + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) { + dst->tc_credits_per_period[j] = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, src->tc_rate[j]); + } +#ifdef RTE_SCHED_SUBPORT_TC_OV + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) { + dst->tc_ov_weight[j] = src->tc_ov_weight[j]; + } +#endif + + /* WRR */ + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) { + uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint32_t lcd, lcd1, lcd2; + uint32_t qindex; + + qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; + + wrr_cost[0] = src->wrr_weights[qindex]; + wrr_cost[1] = src->wrr_weights[qindex + 1]; + wrr_cost[2] = src->wrr_weights[qindex + 2]; + wrr_cost[3] = src->wrr_weights[qindex + 3]; + + lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); + lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); + lcd = rte_get_lcd(lcd1, lcd2); + + wrr_cost[0] = lcd / wrr_cost[0]; + wrr_cost[1] = lcd / wrr_cost[1]; + wrr_cost[2] = lcd / wrr_cost[2]; + wrr_cost[3] = lcd / wrr_cost[3]; + + dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0]; + dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1]; + dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2]; + dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3]; + } + + rte_sched_port_log_pipe_profile(port, i); + } +} + +struct rte_sched_port * +rte_sched_port_config(struct rte_sched_port_params *params) +{ + struct rte_sched_port *port = NULL; + const struct rte_memzone *mz = NULL; + uint32_t mem_size, i; + + /* Check user parameters. Determine the amount of memory to allocate */ + mem_size = rte_sched_port_get_memory_footprint(params); + if (mem_size == 0) { + return NULL; + } + + /* Allocate memory to store the data structures */ + mz = rte_memzone_lookup(params->name); + if (mz) { + /* Use existing memzone, provided that its size is big enough */ + if (mz->len < mem_size) { + return NULL; + } + } else { + /* Create new memzone */ + mz = rte_memzone_reserve(params->name, mem_size, params->socket, 0); + if (mz == NULL) { + return NULL; + } + } + memset(mz->addr, 0, mem_size); + port = (struct rte_sched_port *) mz->addr; + + /* User parameters */ + port->n_subports_per_port = params->n_subports_per_port; + port->n_pipes_per_subport = params->n_pipes_per_subport; + port->rate = params->rate; + port->frame_overhead = params->frame_overhead; + memcpy(port->qsize, params->qsize, sizeof(params->qsize)); + port->n_pipe_profiles = params->n_pipe_profiles; + +#ifdef RTE_SCHED_RED + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + uint32_t j; + + for (j = 0; j < e_RTE_METER_COLORS; j++) { + if (rte_red_config_init(&port->red_config[i][j], + params->red_params[i][j].wq_log2, + params->red_params[i][j].min_th, + params->red_params[i][j].max_th, + params->red_params[i][j].maxp_inv) != 0) { + return NULL; + } + } + } +#endif + + /* Timing */ + port->time_cpu_cycles = rte_get_tsc_cycles(); + port->time_cpu_bytes = 0; + port->time = 0; + port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) params->rate); + + /* Scheduling loop detection */ + port->pipe_loop = RTE_SCHED_PIPE_INVALID; + port->pipe_exhaustion = 0; + + /* Grinders */ + port->busy_grinders = 0; + port->pkts_out = NULL; + port->n_pkts_out = 0; + + /* Queue base calculation */ + rte_sched_port_config_qsize(port); + + /* Large data structures */ + port->subport = (struct rte_sched_subport *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_SUBPORT)); + port->pipe = (struct rte_sched_pipe *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE)); + port->queue = (struct rte_sched_queue *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE)); + port->queue_extra = (struct rte_sched_queue_extra *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA)); + port->pipe_profiles = (struct rte_sched_pipe_profile *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES)); + port->bmp_array = port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY); + port->queue_array = (struct rte_mbuf **) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY)); + + /* Pipe profile table */ + rte_sched_port_config_pipe_profile_table(port, params); + + /* Bitmap */ + if (rte_bitmap_init(&port->bmp, port->bmp_array, rte_sched_port_queues_per_port(port)) != 0) { + RTE_LOG(INFO, SCHED, "Bitmap init error\n"); + return NULL; + } + for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) { + port->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID; + } + + return port; +} + +void +rte_sched_port_free(struct rte_sched_port *port) +{ + /* Check user parameters */ + if (port == NULL){ + return; + } + rte_bitmap_free(&port->bmp); + + return; +} + +static void +rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i) +{ + struct rte_sched_subport *s = port->subport + i; + + RTE_LOG(INFO, SCHED, "Low level config for subport %u:\n" + "\tToken bucket: period = %u, credits per period = %u, size = %u\n" + "\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov period = %u\n", + i, + + /* Token bucket */ + s->tb_period, + s->tb_credits_per_period, + s->tb_size, + + /* Traffic classes */ + s->tc_period, + s->tc_credits_per_period[0], + s->tc_credits_per_period[1], + s->tc_credits_per_period[2], + s->tc_credits_per_period[3], + s->tc_ov_period); +} + +int +rte_sched_subport_config(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_params *params) +{ + struct rte_sched_subport *s; + uint32_t i; + + /* Check user parameters */ + if ((port == NULL) || + (subport_id >= port->n_subports_per_port) || + (params == NULL)) { + return -1; + } + + if ((params->tb_rate == 0) || (params->tb_rate > port->rate)) { + return -2; + } + + if (params->tb_size == 0) { + return -3; + } + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + if ((params->tc_rate[i] == 0) || (params->tc_rate[i] > params->tb_rate)) { + return -4; + } + } + + if (params->tc_period == 0) { + return -5; + } + +#ifdef RTE_SCHED_SUBPORT_TC_OV + if ((params->tc_ov_period == 0) || (params->tc_ov_period > params->tc_period)) { + return -6; + } +#endif + + s = port->subport + subport_id; + + /* Token Bucket (TB) */ + if (params->tb_rate == port->rate) { + s->tb_credits_per_period = 1; + s->tb_period = 1; + } else { + double tb_rate = ((double) params->tb_rate) / ((double) port->rate); + double d = RTE_SCHED_TB_RATE_CONFIG_ERR; + + rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period); + } + s->tb_size = params->tb_size; + s->tb_time = port->time; + s->tb_credits = s->tb_size / 2; + + /* Traffic Classes (TCs) */ + s->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, port->rate); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + s->tc_credits_per_period[i] = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, params->tc_rate[i]); + } + s->tc_time = port->time + s->tc_period; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + s->tc_credits[i] = s->tc_credits_per_period[i]; + } + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* TC oversubscription */ + s->tc_ov_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_ov_period, port->rate); + s->tc_ov_time = port->time + s->tc_ov_period; + s->tc_ov_period_id = 0; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + s->tc_ov[i] = 0; + s->tc_ov_n[i] = 0; + s->tc_ov_rate[i] = 0; + s->tc_ov_credits[i] = 0; + } +#endif + + rte_sched_port_log_subport_config(port, subport_id); + + return 0; +} + +int +rte_sched_pipe_config(struct rte_sched_port *port, + uint32_t subport_id, + uint32_t pipe_id, + int32_t pipe_profile) +{ + struct rte_sched_subport *s; + struct rte_sched_pipe *p; + struct rte_sched_pipe_profile *params; + uint32_t deactivate, profile, i; + + /* Check user parameters */ + profile = (uint32_t) pipe_profile; + deactivate = (pipe_profile < 0); + if ((port == NULL) || + (subport_id >= port->n_subports_per_port) || + (pipe_id >= port->n_pipes_per_subport) || + ((!deactivate) && (profile >= port->n_pipe_profiles))) { + return -1; + } + + /* Check that subport configuration is valid */ + s = port->subport + subport_id; + if (s->tb_period == 0) { + return -2; + } + + p = port->pipe + (subport_id * port->n_pipes_per_subport + pipe_id); + + /* Handle the case when pipe already has a valid configuration */ + if (p->tb_time) { + params = port->pipe_profiles + p->profile; + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* Unplug pipe from its subport */ + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + s->tc_ov_n[i] -= params->tc_ov_weight[i]; + s->tc_ov_rate[i] -= ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period); + s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period)); + } +#endif + + /* Reset the pipe */ + memset(p, 0, sizeof(struct rte_sched_pipe)); + } + + if (deactivate) { + return 0; + } + + /* Apply the new pipe configuration */ + p->profile = profile; + params = port->pipe_profiles + p->profile; + + /* Token Bucket (TB) */ + p->tb_time = port->time; + p->tb_credits = params->tb_size / 2; + + /* Traffic Classes (TCs) */ + p->tc_time = port->time + params->tc_period; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + p->tc_credits[i] = params->tc_credits_per_period[i]; + } + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* Subport TC oversubscription */ + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + s->tc_ov_n[i] += params->tc_ov_weight[i]; + s->tc_ov_rate[i] += ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period); + s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period)); + } + p->tc_ov_period_id = s->tc_ov_period_id; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + p->tc_ov_credits[i] = 0; + } +#endif + + return 0; +} + +int +rte_sched_subport_read_stats(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_stats *stats, + uint32_t *tc_ov) +{ + struct rte_sched_subport *s; + uint32_t mask, i; + + /* Check user parameters */ + if ((port == NULL) || + (subport_id >= port->n_subports_per_port) || + (stats == NULL) || + (tc_ov == NULL)) { + return -1; + } + s = port->subport + subport_id; + + /* Copy subport stats and clear */ + memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats)); + memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats)); + + /* Subport TC ovesubscription status */ + mask = 0; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) { + mask |= ((uint32_t) s->tc_ov[i]) << i; + } + *tc_ov = mask; + + return 0; +} + +int +rte_sched_queue_read_stats(struct rte_sched_port *port, + uint32_t queue_id, + struct rte_sched_queue_stats *stats, + uint16_t *qlen) +{ + struct rte_sched_queue *q; + struct rte_sched_queue_extra *qe; + + /* Check user parameters */ + if ((port == NULL) || + (queue_id >= rte_sched_port_queues_per_port(port)) || + (stats == NULL) || + (qlen == NULL)) { + return -1; + } + q = port->queue + queue_id; + qe = port->queue_extra + queue_id; + + /* Copy queue stats and clear */ + memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats)); + memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats)); + + /* Queue length */ + *qlen = q->qw - q->qr; + + return 0; +} + +static inline uint32_t +rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue) +{ + uint32_t result; + + result = subport * port->n_pipes_per_subport + pipe; + result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + traffic_class; + result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + + return result; +} + +static inline struct rte_mbuf ** +rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex) +{ + uint32_t pindex = qindex >> 4; + uint32_t qpos = qindex & 0xF; + + return (port->queue_array + pindex * port->qsize_sum + port->qsize_add[qpos]); +} + +static inline uint16_t +rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex) +{ + uint32_t tc = (qindex >> 2) & 0x3; + + return port->qsize[tc]; +} + +#if RTE_SCHED_DEBUG + +static inline int +rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex) +{ + struct rte_sched_queue *queue = port->queue + qindex; + + return (queue->qr == queue->qw); +} + +static inline int +rte_sched_port_queue_is_full(struct rte_sched_port *port, uint32_t qindex) +{ + struct rte_sched_queue *queue = port->queue + qindex; + uint16_t qsize = rte_sched_port_qsize(port, qindex); + uint16_t qlen = q->qw - q->qr; + + return (qlen >= qsize); +} + +#endif /* RTE_SCHED_DEBUG */ + +#ifdef RTE_SCHED_COLLECT_STATS + +static inline void +rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) +{ + struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port)); + uint32_t tc_index = (qindex >> 2) & 0x3; + uint32_t pkt_len = pkt->pkt.pkt_len; + + s->stats.n_pkts_tc[tc_index] += 1; + s->stats.n_bytes_tc[tc_index] += pkt_len; +} + +static inline void +rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) +{ + struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port)); + uint32_t tc_index = (qindex >> 2) & 0x3; + uint32_t pkt_len = pkt->pkt.pkt_len; + + s->stats.n_pkts_tc_dropped[tc_index] += 1; + s->stats.n_bytes_tc_dropped[tc_index] += pkt_len; +} + +static inline void +rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) +{ + struct rte_sched_queue_extra *qe = port->queue_extra + qindex; + uint32_t pkt_len = pkt->pkt.pkt_len; + + qe->stats.n_pkts += 1; + qe->stats.n_bytes += pkt_len; +} + +static inline void +rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) +{ + struct rte_sched_queue_extra *qe = port->queue_extra + qindex; + uint32_t pkt_len = pkt->pkt.pkt_len; + + qe->stats.n_pkts_dropped += 1; + qe->stats.n_bytes_dropped += pkt_len; +} + +#endif /* RTE_SCHED_COLLECT_STATS */ + +#ifdef RTE_SCHED_RED + +static inline int +rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen) +{ + struct rte_sched_queue_extra *qe; + struct rte_red_config *red_cfg; + struct rte_red *red; + uint32_t tc_index; + enum rte_meter_color color; + + tc_index = (qindex >> 2) & 0x3; + color = rte_sched_port_pkt_read_color(pkt); + red_cfg = &port->red_config[tc_index][color]; + + qe = port->queue_extra + qindex; + red = &qe->red; + + return rte_red_enqueue(red_cfg, red, qlen, port->time); +} + +static inline void +rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex) +{ + struct rte_sched_queue_extra *qe; + struct rte_red *red; + + qe = port->queue_extra + qindex; + red = &qe->red; + + rte_red_mark_queue_empty(red, port->time); +} + +#else + +#define rte_sched_port_red_drop(port, pkt, qindex, qlen) 0 + +#define rte_sched_port_set_queue_empty_timestamp(port, qindex) + +#endif /* RTE_SCHED_RED */ + +#if RTE_SCHED_DEBUG + +static inline int +debug_pipe_is_empty(struct rte_sched_port *port, uint32_t pindex) +{ + uint32_t qindex, i; + + qindex = pindex << 4; + + for (i = 0; i < 16; i ++){ + uint32_t queue_empty = rte_sched_port_queue_is_empty(port, qindex + i); + uint32_t bmp_bit_clear = (rte_bitmap_get(&port->bmp, qindex + i) == 0); + + if (queue_empty != bmp_bit_clear){ + rte_panic("Queue status mismatch for queue %u of pipe %u\n", i, pindex); + } + + if (!queue_empty){ + return 0; + } + } + + return 1; +} + +static inline void +debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, uint64_t bmp_slab) +{ + uint64_t mask; + uint32_t i, panic; + + if (bmp_slab == 0){ + rte_panic("Empty slab at position %u\n", bmp_pos); + } + + panic = 0; + for (i = 0, mask = 1; i < 64; i ++, mask <<= 1) { + if (mask & bmp_slab){ + if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) { + printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i); + panic = 1; + } + } + } + + if (panic){ + rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n", + bmp_slab, bmp_pos); + } +} + +#endif /* RTE_SCHED_DEBUG */ + +static inline uint32_t +rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_port *port, struct rte_mbuf *pkt) +{ + struct rte_sched_queue *q; +#ifdef RTE_SCHED_COLLECT_STATS + struct rte_sched_queue_extra *qe; +#endif + uint32_t subport, pipe, traffic_class, queue, qindex; + + rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue); + + qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue); + q = port->queue + qindex; + rte_prefetch0(q); +#ifdef RTE_SCHED_COLLECT_STATS + qe = port->queue_extra + qindex; + rte_prefetch0(qe); +#endif + + return qindex; +} + +static inline void +rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase) +{ + struct rte_sched_queue *q; + struct rte_mbuf **q_qw; + uint16_t qsize; + + q = port->queue + qindex; + qsize = rte_sched_port_qsize(port, qindex); + q_qw = qbase + (q->qw & (qsize - 1)); + + rte_prefetch0(q_qw); + rte_bitmap_prefetch0(&port->bmp, qindex); +} + +static inline int +rte_sched_port_enqueue_qwa(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase, struct rte_mbuf *pkt) +{ + struct rte_sched_queue *q; + uint16_t qsize; + uint16_t qlen; + + q = port->queue + qindex; + qsize = rte_sched_port_qsize(port, qindex); + qlen = q->qw - q->qr; + + /* Drop the packet (and update drop stats) when queue is full */ + if (unlikely(rte_sched_port_red_drop(port, pkt, qindex, qlen) || (qlen >= qsize))) { + rte_pktmbuf_free(pkt); +#ifdef RTE_SCHED_COLLECT_STATS + rte_sched_port_update_subport_stats_on_drop(port, qindex, pkt); + rte_sched_port_update_queue_stats_on_drop(port, qindex, pkt); +#endif + return 0; + } + + /* Enqueue packet */ + qbase[q->qw & (qsize - 1)] = pkt; + q->qw ++; + + /* Activate queue in the port bitmap */ + rte_bitmap_set(&port->bmp, qindex); + + /* Statistics */ +#ifdef RTE_SCHED_COLLECT_STATS + rte_sched_port_update_subport_stats(port, qindex, pkt); + rte_sched_port_update_queue_stats(port, qindex, pkt); +#endif + + return 1; +} + +#if RTE_SCHED_ENQUEUE == 0 + +int +rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + uint32_t result, i; + + result = 0; + + for (i = 0; i < n_pkts; i ++) { + struct rte_mbuf *pkt; + struct rte_mbuf **q_base; + uint32_t subport, pipe, traffic_class, queue, qindex; + + pkt = pkts[i]; + + rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue); + + qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue); + + q_base = rte_sched_port_qbase(port, qindex); + + result += rte_sched_port_enqueue_qwa(port, qindex, q_base, pkt); + } + + return result; +} + +#else + +/* The enqueue function implements a 4-level pipeline with each stage processing + * two different packets. The purpose of using a pipeline is to hide the latency + * of prefetching the data structures. The naming convention is presented in the + * diagram below: + * + * p00 _______ p10 _______ p20 _______ p30 _______ + * ----->| |----->| |----->| |----->| |-----> + * | 0 | | 1 | | 2 | | 3 | + * ----->|_______|----->|_______|----->|_______|----->|_______|-----> + * p01 p11 p21 p31 + * + ***/ +int +rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21, *pkt30, *pkt31, *pkt_last; + struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base, **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base; + uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last; + uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last; + uint32_t result, i; + + result = 0; + + /* Less then 6 input packets available, which is not enough to feed the pipeline */ + if (unlikely(n_pkts < 6)) { + struct rte_mbuf **q_base[5]; + uint32_t q[5]; + + /* Prefetch the mbuf structure of each packet */ + for (i = 0; i < n_pkts; i ++) { + rte_prefetch0(pkts[i]); + } + + /* Prefetch the queue structure for each queue */ + for (i = 0; i < n_pkts; i ++) { + q[i] = rte_sched_port_enqueue_qptrs_prefetch0(port, pkts[i]); + } + + /* Prefetch the write pointer location of each queue */ + for (i = 0; i < n_pkts; i ++) { + q_base[i] = rte_sched_port_qbase(port, q[i]); + rte_sched_port_enqueue_qwa_prefetch0(port, q[i], q_base[i]); + } + + /* Write each packet to its queue */ + for (i = 0; i < n_pkts; i ++) { + result += rte_sched_port_enqueue_qwa(port, q[i], q_base[i], pkts[i]); + } + + return result; + } + + /* Feed the first 3 stages of the pipeline (6 packets needed) */ + pkt20 = pkts[0]; + pkt21 = pkts[1]; + rte_prefetch0(pkt20); + rte_prefetch0(pkt21); + + pkt10 = pkts[2]; + pkt11 = pkts[3]; + rte_prefetch0(pkt10); + rte_prefetch0(pkt11); + + q20 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt20); + q21 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt21); + + pkt00 = pkts[4]; + pkt01 = pkts[5]; + rte_prefetch0(pkt00); + rte_prefetch0(pkt01); + + q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10); + q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11); + + q20_base = rte_sched_port_qbase(port, q20); + q21_base = rte_sched_port_qbase(port, q21); + rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base); + + /* Run the pipeline */ + for (i = 6; i < (n_pkts & (~1)); i += 2) { + /* Propagate stage inputs */ + pkt30 = pkt20; + pkt31 = pkt21; + pkt20 = pkt10; + pkt21 = pkt11; + pkt10 = pkt00; + pkt11 = pkt01; + q30 = q20; + q31 = q21; + q20 = q10; + q21 = q11; + q30_base = q20_base; + q31_base = q21_base; + + /* Stage 0: Get packets in */ + pkt00 = pkts[i]; + pkt01 = pkts[i + 1]; + rte_prefetch0(pkt00); + rte_prefetch0(pkt01); + + /* Stage 1: Prefetch queue structure storing queue pointers */ + q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10); + q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11); + + /* Stage 2: Prefetch queue write location */ + q20_base = rte_sched_port_qbase(port, q20); + q21_base = rte_sched_port_qbase(port, q21); + rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base); + + /* Stage 3: Write packet to queue and activate queue */ + r30 = rte_sched_port_enqueue_qwa(port, q30, q30_base, pkt30); + r31 = rte_sched_port_enqueue_qwa(port, q31, q31_base, pkt31); + result += r30 + r31; + } + + /* Drain the pipeline (exactly 6 packets). Handle the last packet in the case + of an odd number of input packets. */ + pkt_last = pkts[n_pkts - 1]; + rte_prefetch0(pkt_last); + + q00 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt00); + q01 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt01); + + q10_base = rte_sched_port_qbase(port, q10); + q11_base = rte_sched_port_qbase(port, q11); + rte_sched_port_enqueue_qwa_prefetch0(port, q10, q10_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q11, q11_base); + + r20 = rte_sched_port_enqueue_qwa(port, q20, q20_base, pkt20); + r21 = rte_sched_port_enqueue_qwa(port, q21, q21_base, pkt21); + result += r20 + r21; + + q_last = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt_last); + + q00_base = rte_sched_port_qbase(port, q00); + q01_base = rte_sched_port_qbase(port, q01); + rte_sched_port_enqueue_qwa_prefetch0(port, q00, q00_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q01, q01_base); + + r10 = rte_sched_port_enqueue_qwa(port, q10, q10_base, pkt10); + r11 = rte_sched_port_enqueue_qwa(port, q11, q11_base, pkt11); + result += r10 + r11; + + q_last_base = rte_sched_port_qbase(port, q_last); + rte_sched_port_enqueue_qwa_prefetch0(port, q_last, q_last_base); + + r00 = rte_sched_port_enqueue_qwa(port, q00, q00_base, pkt00); + r01 = rte_sched_port_enqueue_qwa(port, q01, q01_base, pkt01); + result += r00 + r01; + + if (n_pkts & 1) { + r_last = rte_sched_port_enqueue_qwa(port, q_last, q_last_base, pkt_last); + result += r_last; + } + + return result; +} + +#endif /* RTE_SCHED_ENQUEUE */ + +#if RTE_SCHED_TS_CREDITS_UPDATE == 0 + +#define grinder_credits_update(port, pos) + +#elif !defined(RTE_SCHED_SUBPORT_TC_OV) + +static inline void +grinder_credits_update(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_sched_pipe_profile *params = grinder->pipe_params; + uint64_t n_periods; + + /* Subport TB */ + n_periods = (port->time - subport->tb_time) / subport->tb_period; + subport->tb_credits += n_periods * subport->tb_credits_per_period; + subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_time += n_periods * subport->tb_period; + + /* Pipe TB */ + n_periods = (port->time - pipe->tb_time) / params->tb_period; + pipe->tb_credits += n_periods * params->tb_credits_per_period; + pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_time += n_periods * params->tb_period; + + /* Subport TCs */ + if (unlikely(port->time >= subport->tc_time)) { + subport->tc_credits[0] = subport->tc_credits_per_period[0]; + subport->tc_credits[1] = subport->tc_credits_per_period[1]; + subport->tc_credits[2] = subport->tc_credits_per_period[2]; + subport->tc_credits[3] = subport->tc_credits_per_period[3]; + subport->tc_time = port->time + subport->tc_period; + } + + /* Pipe TCs */ + if (unlikely(port->time >= pipe->tc_time)) { + pipe->tc_credits[0] = params->tc_credits_per_period[0]; + pipe->tc_credits[1] = params->tc_credits_per_period[1]; + pipe->tc_credits[2] = params->tc_credits_per_period[2]; + pipe->tc_credits[3] = params->tc_credits_per_period[3]; + pipe->tc_time = port->time + params->tc_period; + } +} + +#else + +static inline void +grinder_credits_update(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_sched_pipe_profile *params = grinder->pipe_params; + uint64_t n_periods; + + /* Subport TB */ + n_periods = (port->time - subport->tb_time) / subport->tb_period; + subport->tb_credits += n_periods * subport->tb_credits_per_period; + subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_time += n_periods * subport->tb_period; + + /* Pipe TB */ + n_periods = (port->time - pipe->tb_time) / params->tb_period; + pipe->tb_credits += n_periods * params->tb_credits_per_period; + pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_time += n_periods * params->tb_period; + + /* Subport TCs */ + if (unlikely(port->time >= subport->tc_ov_time)) { + uint64_t n_ov_periods; + + if (unlikely(port->time >= subport->tc_time)) { + subport->tc_credits[0] = subport->tc_credits_per_period[0]; + subport->tc_credits[1] = subport->tc_credits_per_period[1]; + subport->tc_credits[2] = subport->tc_credits_per_period[2]; + subport->tc_credits[3] = subport->tc_credits_per_period[3]; + + subport->tc_time = port->time + subport->tc_period; + } + + n_ov_periods = (subport->tc_time - port->time + subport->tc_ov_period - 1) / subport->tc_ov_period; + + subport->tc_ov_credits[0] = subport->tc_credits[0] / (n_ov_periods * subport->tc_ov_n[0]); + subport->tc_ov_credits[1] = subport->tc_credits[1] / (n_ov_periods * subport->tc_ov_n[1]); + subport->tc_ov_credits[2] = subport->tc_credits[2] / (n_ov_periods * subport->tc_ov_n[2]); + subport->tc_ov_credits[3] = subport->tc_credits[3] / (n_ov_periods * subport->tc_ov_n[3]); + + subport->tc_ov_time = port->time + subport->tc_ov_period; + subport->tc_ov_period_id ++; + } + + /* Pipe TCs */ + if (unlikely(port->time >= pipe->tc_time)) { + pipe->tc_credits[0] = params->tc_credits_per_period[0]; + pipe->tc_credits[1] = params->tc_credits_per_period[1]; + pipe->tc_credits[2] = params->tc_credits_per_period[2]; + pipe->tc_credits[3] = params->tc_credits_per_period[3]; + pipe->tc_time = port->time + params->tc_period; + } + if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) { + uint32_t pipe_tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_mask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t mask[] = {UINT32_MAX, 0}; + + tc_mask[0] = mask[subport->tc_ov[0]]; + tc_mask[1] = mask[subport->tc_ov[1]]; + tc_mask[2] = mask[subport->tc_ov[2]]; + tc_mask[3] = mask[subport->tc_ov[3]]; + + pipe_tc_ov_credits[0] = subport->tc_ov_credits[0] * params->tc_ov_weight[0]; + pipe_tc_ov_credits[1] = subport->tc_ov_credits[1] * params->tc_ov_weight[1]; + pipe_tc_ov_credits[2] = subport->tc_ov_credits[2] * params->tc_ov_weight[2]; + pipe_tc_ov_credits[3] = subport->tc_ov_credits[3] * params->tc_ov_weight[3]; + + pipe->tc_ov_credits[0] = (tc_mask[0] & pipe->tc_credits[0]) | ((~ tc_mask[0]) & pipe_tc_ov_credits[0]); + pipe->tc_ov_credits[1] = (tc_mask[1] & pipe->tc_credits[1]) | ((~ tc_mask[1]) & pipe_tc_ov_credits[1]); + pipe->tc_ov_credits[2] = (tc_mask[2] & pipe->tc_credits[2]) | ((~ tc_mask[2]) & pipe_tc_ov_credits[2]); + pipe->tc_ov_credits[3] = (tc_mask[3] & pipe->tc_credits[3]) | ((~ tc_mask[3]) & pipe_tc_ov_credits[3]); + + pipe->tc_ov_period_id = subport->tc_ov_period_id; + } +} + +#endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */ + +#ifndef RTE_SCHED_SUBPORT_TC_OV + +static inline int +grinder_credits_check(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_mbuf *pkt = grinder->pkt; + uint32_t tc_index = grinder->tc_index; + uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead; + int enough_credits; + + /* Check queue credits */ + enough_credits = (pkt_len <= subport->tb_credits) && + (pkt_len <= subport->tc_credits[tc_index]) && + (pkt_len <= pipe->tb_credits) && + (pkt_len <= pipe->tc_credits[tc_index]); + + if (!enough_credits) { + return 0; + } + + /* Update port credits */ + subport->tb_credits -= pkt_len; + subport->tc_credits[tc_index] -= pkt_len; + pipe->tb_credits -= pkt_len; + pipe->tc_credits[tc_index] -= pkt_len; + + return 1; +} + +#else + +static inline int +grinder_credits_check(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_mbuf *pkt = grinder->pkt; + uint32_t tc_index = grinder->tc_index; + uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead; + uint32_t subport_tb_credits = subport->tb_credits; + uint32_t subport_tc_credits = subport->tc_credits[tc_index]; + uint32_t pipe_tb_credits = pipe->tb_credits; + uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; + uint32_t pipe_tc_ov_credits = pipe->tc_ov_credits[tc_index]; + int enough_credits; + + /* Check pipe and subport credits */ + enough_credits = (pkt_len <= subport_tb_credits) && + (pkt_len <= subport_tc_credits) && + (pkt_len <= pipe_tb_credits) && + (pkt_len <= pipe_tc_credits) && + (pkt_len <= pipe_tc_ov_credits); + + if (!enough_credits) { + return 0; + } + + /* Update pipe and subport credits */ + subport->tb_credits -= pkt_len; + subport->tc_credits[tc_index] -= pkt_len; + pipe->tb_credits -= pkt_len; + pipe->tc_credits[tc_index] -= pkt_len; + pipe->tc_ov_credits[tc_index] -= pkt_len; + + return 1; +} + +#endif /* RTE_SCHED_SUBPORT_TC_OV */ + +static inline int +grinder_schedule(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; + struct rte_mbuf *pkt = grinder->pkt; + uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead; + +#if RTE_SCHED_TS_CREDITS_CHECK + if (!grinder_credits_check(port, pos)) { + return 0; + } +#endif + + /* Advance port time */ + port->time += pkt_len; + + /* Send packet */ + port->pkts_out[port->n_pkts_out ++] = pkt; + queue->qr ++; + grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos]; + if (queue->qr == queue->qw) { + uint32_t qindex = grinder->qindex[grinder->qpos]; + + rte_bitmap_clear(&port->bmp, qindex); + grinder->qmask &= ~(1 << grinder->qpos); + grinder->wrr_mask[grinder->qpos] = 0; + rte_sched_port_set_queue_empty_timestamp(port, qindex); + } + + /* Reset pipe loop detection */ + port->pipe_loop = RTE_SCHED_PIPE_INVALID; + grinder->productive = 1; + + return 1; +} + +#if RTE_SCHED_OPTIMIZATIONS + +static inline int +grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +{ + __m128i index = _mm_set1_epi32 (base_pipe); + __m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos); + __m128i res = _mm_cmpeq_epi32(pipes, index); + pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4)); + pipes = _mm_cmpeq_epi32(pipes, index); + res = _mm_or_si128(res, pipes); + + if (_mm_testz_si128(res, res)) + return 0; + + return 1; +} + +#else + +static inline int +grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +{ + uint32_t i; + + for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) { + if (port->grinder_base_bmp_pos[i] == base_pipe) { + return 1; + } + } + + return 0; +} + +#endif /* RTE_SCHED_OPTIMIZATIONS */ + +static inline void +grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint16_t w[4]; + + grinder->pcache_w = 0; + grinder->pcache_r = 0; + + w[0] = (uint16_t) bmp_slab; + w[1] = (uint16_t) (bmp_slab >> 16); + w[2] = (uint16_t) (bmp_slab >> 32); + w[3] = (uint16_t) (bmp_slab >> 48); + + grinder->pcache_qmask[grinder->pcache_w] = w[0]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos; + grinder->pcache_w += (w[0] != 0); + + grinder->pcache_qmask[grinder->pcache_w] = w[1]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16; + grinder->pcache_w += (w[1] != 0); + + grinder->pcache_qmask[grinder->pcache_w] = w[2]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32; + grinder->pcache_w += (w[2] != 0); + + grinder->pcache_qmask[grinder->pcache_w] = w[3]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48; + grinder->pcache_w += (w[3] != 0); +} + +static inline void +grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint8_t b[4]; + + grinder->tccache_w = 0; + grinder->tccache_r = 0; + + b[0] = (uint8_t) (qmask & 0xF); + b[1] = (uint8_t) ((qmask >> 4) & 0xF); + b[2] = (uint8_t) ((qmask >> 8) & 0xF); + b[3] = (uint8_t) ((qmask >> 12) & 0xF); + + grinder->tccache_qmask[grinder->tccache_w] = b[0]; + grinder->tccache_qindex[grinder->tccache_w] = qindex; + grinder->tccache_w += (b[0] != 0); + + grinder->tccache_qmask[grinder->tccache_w] = b[1]; + grinder->tccache_qindex[grinder->tccache_w] = qindex + 4; + grinder->tccache_w += (b[1] != 0); + + grinder->tccache_qmask[grinder->tccache_w] = b[2]; + grinder->tccache_qindex[grinder->tccache_w] = qindex + 8; + grinder->tccache_w += (b[2] != 0); + + grinder->tccache_qmask[grinder->tccache_w] = b[3]; + grinder->tccache_qindex[grinder->tccache_w] = qindex + 12; + grinder->tccache_w += (b[3] != 0); +} + +static inline int +grinder_next_tc(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_mbuf **qbase; + uint32_t qindex; + uint16_t qsize; + + if (grinder->tccache_r == grinder->tccache_w) { + return 0; + } + + qindex = grinder->tccache_qindex[grinder->tccache_r]; + qbase = rte_sched_port_qbase(port, qindex); + qsize = rte_sched_port_qsize(port, qindex); + + grinder->tc_index = (qindex >> 2) & 0x3; + grinder->qmask = grinder->tccache_qmask[grinder->tccache_r]; + grinder->qsize = qsize; + + grinder->qindex[0] = qindex; + grinder->qindex[1] = qindex + 1; + grinder->qindex[2] = qindex + 2; + grinder->qindex[3] = qindex + 3; + + grinder->queue[0] = port->queue + qindex; + grinder->queue[1] = port->queue + qindex + 1; + grinder->queue[2] = port->queue + qindex + 2; + grinder->queue[3] = port->queue + qindex + 3; + + grinder->qbase[0] = qbase; + grinder->qbase[1] = qbase + qsize; + grinder->qbase[2] = qbase + 2 * qsize; + grinder->qbase[3] = qbase + 3 * qsize; + + grinder->tccache_r ++; + return 1; +} + +static inline int +grinder_next_pipe(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint32_t pipe_qindex; + uint16_t pipe_qmask; + + if (grinder->pcache_r < grinder->pcache_w) { + pipe_qmask = grinder->pcache_qmask[grinder->pcache_r]; + pipe_qindex = grinder->pcache_qindex[grinder->pcache_r]; + grinder->pcache_r ++; + } else { + uint64_t bmp_slab = 0; + uint32_t bmp_pos = 0; + + /* Get another non-empty pipe group */ + if (unlikely(rte_bitmap_scan(&port->bmp, &bmp_pos, &bmp_slab) <= 0)) { + return 0; + } + +#if RTE_SCHED_DEBUG + debug_check_queue_slab(port, bmp_pos, bmp_slab); +#endif + + /* Return if pipe group already in one of the other grinders */ + port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID; + if (unlikely(grinder_pipe_exists(port, bmp_pos))) { + return 0; + } + port->grinder_base_bmp_pos[pos] = bmp_pos; + + /* Install new pipe group into grinder's pipe cache */ + grinder_pcache_populate(port, pos, bmp_pos, bmp_slab); + + pipe_qmask = grinder->pcache_qmask[0]; + pipe_qindex = grinder->pcache_qindex[0]; + grinder->pcache_r = 1; + } + + /* Install new pipe in the grinder */ + grinder->pindex = pipe_qindex >> 4; + grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport); + grinder->pipe = port->pipe + grinder->pindex; + grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */ + grinder->productive = 0; + + grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask); + grinder_next_tc(port, pos); + + /* Check for pipe exhaustion */ + if (grinder->pindex == port->pipe_loop) { + port->pipe_exhaustion = 1; + port->pipe_loop = RTE_SCHED_PIPE_INVALID; + } + + return 1; +} + +#if RTE_SCHED_WRR == 0 + +#define grinder_wrr_load(a,b) + +#define grinder_wrr_store(a,b) + +static inline void +grinder_wrr(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint64_t slab = grinder->qmask; + + if (rte_bsf64(slab, &grinder->qpos) == 0) { + rte_panic("grinder wrr\n"); + } +} + +#elif RTE_SCHED_WRR == 1 + +static inline void +grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; + uint32_t tc_index = grinder->tc_index; + uint32_t qmask = grinder->qmask; + uint32_t qindex; + + qindex = tc_index * 4; + + grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT; + + grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF; + grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF; + grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF; + grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF; + + grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex]; + grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1]; + grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2]; + grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3]; +} + +static inline void +grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_pipe *pipe = grinder->pipe; + uint32_t tc_index = grinder->tc_index; + uint32_t qindex; + + qindex = tc_index * 4; + + pipe->wrr_tokens[qindex] = (uint8_t) ((grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >> RTE_SCHED_WRR_SHIFT); + pipe->wrr_tokens[qindex + 1] = (uint8_t) ((grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >> RTE_SCHED_WRR_SHIFT); + pipe->wrr_tokens[qindex + 2] = (uint8_t) ((grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >> RTE_SCHED_WRR_SHIFT); + pipe->wrr_tokens[qindex + 3] = (uint8_t) ((grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >> RTE_SCHED_WRR_SHIFT); +} + +static inline void +grinder_wrr(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint16_t wrr_tokens_min; + + grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; + grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; + grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2]; + grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3]; + + grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens); + wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; + + grinder->wrr_tokens[0] -= wrr_tokens_min; + grinder->wrr_tokens[1] -= wrr_tokens_min; + grinder->wrr_tokens[2] -= wrr_tokens_min; + grinder->wrr_tokens[3] -= wrr_tokens_min; +} + +#else + +#error Invalid value for RTE_SCHED_WRR + +#endif /* RTE_SCHED_WRR */ + +#define grinder_evict(port, pos) + +static inline void +grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + + rte_prefetch0(grinder->pipe); + rte_prefetch0(grinder->queue[0]); +} + +static inline void +grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint16_t qsize, qr[4]; + + qsize = grinder->qsize; + qr[0] = grinder->queue[0]->qr & (qsize - 1); + qr[1] = grinder->queue[1]->qr & (qsize - 1); + qr[2] = grinder->queue[2]->qr & (qsize - 1); + qr[3] = grinder->queue[3]->qr & (qsize - 1); + + rte_prefetch0(grinder->qbase[0] + qr[0]); + rte_prefetch0(grinder->qbase[1] + qr[1]); + + grinder_wrr_load(port, pos); + grinder_wrr(port, pos); + + rte_prefetch0(grinder->qbase[2] + qr[2]); + rte_prefetch0(grinder->qbase[3] + qr[3]); +} + +static inline void +grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint32_t qpos = grinder->qpos; + struct rte_mbuf **qbase = grinder->qbase[qpos]; + uint16_t qsize = grinder->qsize; + uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1); + + grinder->pkt = qbase[qr]; + rte_prefetch0(grinder->pkt); + + if (unlikely((qr & 0x7) == 7)) { + uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1); + + rte_prefetch0(qbase + qr_next); + } +} + +static inline uint32_t +grinder_handle(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + + switch (grinder->state) { + case e_GRINDER_PREFETCH_PIPE: + { + if (grinder_next_pipe(port, pos)) { + grinder_prefetch_pipe(port, pos); + port->busy_grinders ++; + + grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; + return 0; + } + + return 0; + } + + case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS: + { + struct rte_sched_pipe *pipe = grinder->pipe; + + grinder->pipe_params = port->pipe_profiles + pipe->profile; + grinder_prefetch_tc_queue_arrays(port, pos); + grinder_credits_update(port, pos); + + grinder->state = e_GRINDER_PREFETCH_MBUF; + return 0; + } + + case e_GRINDER_PREFETCH_MBUF: + { + grinder_prefetch_mbuf(port, pos); + + grinder->state = e_GRINDER_READ_MBUF; + return 0; + } + + case e_GRINDER_READ_MBUF: + { + uint32_t result = 0; + + result = grinder_schedule(port, pos); + + /* Look for next packet within the same TC */ + if (result && grinder->qmask) { + grinder_wrr(port, pos); + grinder_prefetch_mbuf(port, pos); + + return 1; + } + grinder_wrr_store(port, pos); + + /* Look for another active TC within same pipe */ + if (grinder_next_tc(port, pos)) { + grinder_prefetch_tc_queue_arrays(port, pos); + + grinder->state = e_GRINDER_PREFETCH_MBUF; + return result; + } + if ((grinder->productive == 0) && (port->pipe_loop == RTE_SCHED_PIPE_INVALID)) { + port->pipe_loop = grinder->pindex; + } + grinder_evict(port, pos); + + /* Look for another active pipe */ + if (grinder_next_pipe(port, pos)) { + grinder_prefetch_pipe(port, pos); + + grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; + return result; + } + + /* No active pipe found */ + port->busy_grinders --; + + grinder->state = e_GRINDER_PREFETCH_PIPE; + return result; + } + + default: + rte_panic("Algorithmic error (invalid state)\n"); + return 0; + } +} + +static inline void +rte_sched_port_time_resync(struct rte_sched_port *port) +{ + uint64_t cycles = rte_get_tsc_cycles(); + uint64_t cycles_diff = cycles - port->time_cpu_cycles; + double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte; + + /* Advance port time */ + port->time_cpu_cycles = cycles; + port->time_cpu_bytes += (uint64_t) bytes_diff; + if (port->time < port->time_cpu_bytes) { + port->time = port->time_cpu_bytes; + } + + /* Reset pipe loop detection */ + port->pipe_loop = RTE_SCHED_PIPE_INVALID; +} + +static inline int +rte_sched_port_exceptions(struct rte_sched_port *port) +{ + int exceptions; + + /* Check if any exception flag is set */ + exceptions = (port->busy_grinders == 0) || + (port->pipe_exhaustion == 1); + + /* Clear exception flags */ + port->pipe_exhaustion = 0; + + return exceptions; +} + +int +rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + uint32_t i, count; + + port->pkts_out = pkts; + port->n_pkts_out = 0; + + rte_sched_port_time_resync(port); + + /* Take each queue in the grinder one step further */ + for (i = 0, count = 0; ; i ++) { + count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1)); + if ((count == n_pkts) || rte_sched_port_exceptions(port)) { + break; + } + } + + return count; +} diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h new file mode 100644 index 0000000000..7b49248b59 --- /dev/null +++ b/lib/librte_sched/rte_sched.h @@ -0,0 +1,446 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __INCLUDE_RTE_SCHED_H__ +#define __INCLUDE_RTE_SCHED_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Hierarchical Scheduler + * + * The hierarchical scheduler prioritizes the transmission of packets from different + * users and traffic classes according to the Service Level Agreements (SLAs) defined + * for the current network node. + * + * The scheduler supports thousands of packet queues grouped under a 5-level hierarchy: + * 1. Port: + * - Typical usage: output Ethernet port; + * - Multiple ports are scheduled in round robin order with equal priority; + * 2. Subport: + * - Typical usage: group of users; + * - Traffic shaping using the token bucket algorithm (one bucket per subport); + * - Upper limit enforced per traffic class at subport level; + * - Lower priority traffic classes able to reuse subport bandwidth currently + * unused by higher priority traffic classes of the same subport; + * - When any subport traffic class is oversubscribed (configuration time + * event), the usage of subport member pipes with high demand for that + * traffic class pipes is truncated to a dynamically adjusted value with no + * impact to low demand pipes; + * 3. Pipe: + * - Typical usage: individual user/subscriber; + * - Traffic shaping using the token bucket algorithm (one bucket per pipe); + * 4. Traffic class: + * - Traffic classes of the same pipe handled in strict priority order; + * - Upper limit enforced per traffic class at the pipe level; + * - Lower priority traffic classes able to reuse pipe bandwidth currently + * unused by higher priority traffic classes of the same pipe; + * 5. Queue: + * - Typical usage: queue hosting packets from one or multiple connections + * of same traffic class belonging to the same user; + * - Weighted Round Robin (WRR) is used to service the queues within same + * pipe traffic class. + * + ***/ + +#include +#include +#include + +/** Random Early Detection (RED) */ +#ifdef RTE_SCHED_RED +#include "rte_red.h" +#endif + +/** Number of traffic classes per pipe (as well as subport). Cannot be changed. */ +#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE 4 + +/** Number of queues per pipe traffic class. Cannot be changed. */ +#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS 4 + +/** Number of queues per pipe. */ +#define RTE_SCHED_QUEUES_PER_PIPE \ + (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \ + RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + +/** Maximum number of pipe profiles that can be defined per port. Compile-time configurable.*/ +#ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT +#define RTE_SCHED_PIPE_PROFILES_PER_PORT 256 +#endif + +/** Ethernet framing overhead. Overhead fields per Ethernet frame: + 1. Preamble: 7 bytes; + 2. Start of Frame Delimiter (SFD): 1 byte; + 3. Frame Check Sequence (FCS): 4 bytes; + 4. Inter Frame Gap (IFG): 12 bytes. +The FCS is considered overhead only if not included in the packet length (field pkt.pkt_len +of struct rte_mbuf). */ +#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT +#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 +#endif + +/** Subport configuration parameters. The period and credits_per_period parameters are measured +in bytes, with one byte meaning the time duration associated with the transmission of one byte +on the physical medium of the output port, with pipe or pipe traffic class rate (measured as +percentage of output port rate) determined as credits_per_period divided by period. One credit +represents one byte. */ +struct rte_sched_subport_params { + /* Subport token bucket */ + uint32_t tb_rate; /**< Subport token bucket rate (measured in bytes per second) */ + uint32_t tb_size; /**< Subport token bucket size (measured in credits) */ + + /* Subport traffic classes */ + uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Subport traffic class rates (measured in bytes per second) */ + uint32_t tc_period; /**< Enforcement period for traffic class rates (measured in milliseconds) */ +#ifdef RTE_SCHED_SUBPORT_TC_OV + uint32_t tc_ov_period; /**< Enforcement period for traffic class oversubscription (measured in milliseconds) */ +#endif +}; + +/** Subport statistics */ +struct rte_sched_subport_stats { + /* Packets */ + uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets successfully written to current + subport for each traffic class */ + uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets dropped by the current + subport for each traffic class due to subport queues being full or congested*/ + + /* Bytes */ + uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes successfully written to current + subport for each traffic class*/ + uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes dropped by the current + subport for each traffic class due to subport queues being full or congested */ +}; + +/** Pipe configuration parameters. The period and credits_per_period parameters are measured +in bytes, with one byte meaning the time duration associated with the transmission of one byte +on the physical medium of the output port, with pipe or pipe traffic class rate (measured as +percentage of output port rate) determined as credits_per_period divided by period. One credit +represents one byte. */ +struct rte_sched_pipe_params { + /* Pipe token bucket */ + uint32_t tb_rate; /**< Pipe token bucket rate (measured in bytes per second) */ + uint32_t tb_size; /**< Pipe token bucket size (measured in credits) */ + + /* Pipe traffic classes */ + uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Pipe traffic class rates (measured in bytes per second) */ + uint32_t tc_period; /**< Enforcement period for pipe traffic class rates (measured in milliseconds) */ +#ifdef RTE_SCHED_SUBPORT_TC_OV + uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Traffic class weights to be used for the + current pipe in the event of subport traffic class oversubscription */ +#endif + + /* Pipe queues */ + uint8_t wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights for the queues of the current pipe */ +}; + +/** Queue statistics */ +struct rte_sched_queue_stats { + /* Packets */ + uint32_t n_pkts; /**< Number of packets successfully written to current queue */ + uint32_t n_pkts_dropped; /**< Number of packets dropped due to current queue being full or congested */ + + /* Bytes */ + uint32_t n_bytes; /**< Number of bytes successfully written to current queue */ + uint32_t n_bytes_dropped; /**< Number of bytes dropped due to current queue being full or congested */ +}; + +/** Port configuration parameters. */ +struct rte_sched_port_params { + const char *name; /**< Literal string to be associated to the current port scheduler instance */ + int socket; /**< CPU socket ID where the memory for port scheduler should be allocated */ + uint32_t rate; /**< Output port rate (measured in bytes per second) */ + uint32_t frame_overhead; /**< Framing overhead per packet (measured in bytes) */ + uint32_t n_subports_per_port; /**< Number of subports for the current port scheduler instance*/ + uint32_t n_pipes_per_subport; /**< Number of pipes for each port scheduler subport */ + uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Packet queue size for each traffic class. All queues + within the same pipe traffic class have the same size. Queues from + different pipes serving the same traffic class have the same size. */ + struct rte_sched_pipe_params *pipe_profiles; /**< Pipe profile table defined for current port scheduler instance. + Every pipe of the current port scheduler is configured using one of the + profiles from this table. */ + uint32_t n_pipe_profiles; /**< Number of profiles in the pipe profile table */ +#ifdef RTE_SCHED_RED + struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */ +#endif +}; + +/** Path through the scheduler hierarchy used by the scheduler enqueue operation to +identify the destination queue for the current packet. Stored in the field pkt.hash.sched +of struct rte_mbuf of each packet, typically written by the classification stage and read by +scheduler enqueue.*/ +struct rte_sched_port_hierarchy { + uint32_t queue:2; /**< Queue ID (0 .. 3) */ + uint32_t traffic_class:2; /**< Traffic class ID (0 .. 3)*/ + uint32_t pipe:20; /**< Pipe ID */ + uint32_t subport:6; /**< Subport ID */ + uint32_t color:2; /**< Color */ +}; + +/* + * Configuration + * + ***/ + +/** + * Hierarchical scheduler port configuration + * + * @param params + * Port scheduler configuration parameter structure + * @return + * Handle to port scheduler instance upon success or NULL otherwise. + */ +struct rte_sched_port * +rte_sched_port_config(struct rte_sched_port_params *params); + +/** + * Hierarchical scheduler port free + * + * @param port + * Handle to port scheduler instance + */ +void +rte_sched_port_free(struct rte_sched_port *port); + +/** + * Hierarchical scheduler subport configuration + * + * @param port + * Handle to port scheduler instance + * @param subport_id + * Subport ID + * @param params + * Subport configuration parameters + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_subport_config(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_params *params); + +/** + * Hierarchical scheduler pipe configuration + * + * @param port + * Handle to port scheduler instance + * @param subport_id + * Subport ID + * @param pipe_id + * Pipe ID within subport + * @param pipe_profile + * ID of port-level pre-configured pipe profile + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_pipe_config(struct rte_sched_port *port, + uint32_t subport_id, + uint32_t pipe_id, + int32_t pipe_profile); + +/** + * Hierarchical scheduler memory footprint size per port + * + * @param params + * Port scheduler configuration parameter structure + * @return + * Memory footprint size in bytes upon success, 0 otherwise + */ +uint32_t +rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params); + +/* + * Statistics + * + ***/ + +/** + * Hierarchical scheduler subport statistics read + * + * @param port + * Handle to port scheduler instance + * @param subport_id + * Subport ID + * @param stats + * Pointer to pre-allocated subport statistics structure where the statistics + * counters should be stored + * @param tc_ov + * Pointer to pre-allocated 4-entry array where the oversubscription status for + * each of the 4 subport traffic classes should be stored. + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_subport_read_stats(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_stats *stats, + uint32_t *tc_ov); + +/** + * Hierarchical scheduler queue statistics read + * + * @param port + * Handle to port scheduler instance + * @param queue_id + * Queue ID within port scheduler + * @param stats + * Pointer to pre-allocated subport statistics structure where the statistics + * counters should be stored + * @param qlen + * Pointer to pre-allocated variable where the current queue length should be stored. + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_queue_read_stats(struct rte_sched_port *port, + uint32_t queue_id, + struct rte_sched_queue_stats *stats, + uint16_t *qlen); + +/* + * Run-time + * + ***/ + +/** + * Scheduler hierarchy path write to packet descriptor. Typically called by the + * packet classification stage. + * + * @param pkt + * Packet descriptor handle + * @param subport + * Subport ID + * @param pipe + * Pipe ID within subport + * @param traffic_class + * Traffic class ID within pipe (0 .. 3) + * @param queue + * Queue ID within pipe traffic class (0 .. 3) + */ +static inline void +rte_sched_port_pkt_write(struct rte_mbuf *pkt, + uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color) +{ + struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched; + + sched->color = (uint32_t) color; + sched->subport = subport; + sched->pipe = pipe; + sched->traffic_class = traffic_class; + sched->queue = queue; +} + +/** + * Scheduler hierarchy path read from packet descriptor (struct rte_mbuf). Typically + * called as part of the hierarchical scheduler enqueue operation. The subport, + * pipe, traffic class and queue parameters need to be pre-allocated by the caller. + * + * @param pkt + * Packet descriptor handle + * @param subport + * Subport ID + * @param pipe + * Pipe ID within subport + * @param traffic_class + * Traffic class ID within pipe (0 .. 3) + * @param queue + * Queue ID within pipe traffic class (0 .. 3) + * + */ +static inline void +rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue) +{ + struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched; + + *subport = sched->subport; + *pipe = sched->pipe; + *traffic_class = sched->traffic_class; + *queue = sched->queue; +} + +static inline enum rte_meter_color +rte_sched_port_pkt_read_color(struct rte_mbuf *pkt) +{ + struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched; + + return (enum rte_meter_color) sched->color; +} + +/** + * Hierarchical scheduler port enqueue. Writes up to n_pkts to port scheduler and + * returns the number of packets actually written. For each packet, the port scheduler + * queue to write the packet to is identified by reading the hierarchy path from the + * packet descriptor; if the queue is full or congested and the packet is not written + * to the queue, then the packet is automatically dropped without any action required + * from the caller. + * + * @param port + * Handle to port scheduler instance + * @param pkts + * Array storing the packet descriptor handles + * @param n_pkts + * Number of packets to enqueue from the pkts array into the port scheduler + * @return + * Number of packets successfully enqueued + */ +int +rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); + +/** + * Hierarchical scheduler port dequeue. Reads up to n_pkts from the port scheduler + * and stores them in the pkts array and returns the number of packets actually read. + * The pkts array needs to be pre-allocated by the caller with at least n_pkts entries. + * + * @param port + * Handle to port scheduler instance + * @param pkts + * Pre-allocated packet descriptor array where the packets dequeued from the port + * scheduler should be stored + * @param n_pkts + * Number of packets to dequeue from the port scheduler + * @return + * Number of packets successfully dequeued and placed in the pkts array + */ +int +rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_SCHED_H__ */ diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h new file mode 100644 index 0000000000..dc76ad8675 --- /dev/null +++ b/lib/librte_sched/rte_sched_common.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __INCLUDE_RTE_SCHED_COMMON_H__ +#define __INCLUDE_RTE_SCHED_COMMON_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define __rte_aligned_16 __attribute__((__aligned__(16))) + +static inline uint32_t +rte_sched_min_val_2_u32(uint32_t x, uint32_t y) +{ + return (x < y)? x : y; +} + +#if 0 +static inline uint32_t +rte_min_pos_4_u16(uint16_t *x) +{ + uint32_t pos0, pos1; + + pos0 = (x[0] <= x[1])? 0 : 1; + pos1 = (x[2] <= x[3])? 2 : 3; + + return (x[pos0] <= x[pos1])? pos0 : pos1; +} + +#else + +/* simplified version to remove branches with CMOV instruction */ +static inline uint32_t +rte_min_pos_4_u16(uint16_t *x) +{ + uint32_t pos0 = 0; + uint32_t pos1 = 2; + + if (x[1] <= x[0]) pos0 = 1; + if (x[3] <= x[2]) pos1 = 3; + if (x[pos1] <= x[pos0]) pos0 = pos1; + + return pos0; +} + +#endif + +/* + * Compute the Greatest Common Divisor (GCD) of two numbers. + * This implementation uses Euclid's algorithm: + * gcd(a, 0) = a + * gcd(a, b) = gcd(b, a mod b) + * + */ +static inline uint32_t +rte_get_gcd(uint32_t a, uint32_t b) +{ + uint32_t c; + + if (a == 0) + return b; + if (b == 0) + return a; + + if (a < b) { + c = a; + a = b; + b = c; + } + + while (b != 0) { + c = a % b; + a = b; + b = c; + } + + return a; +} + +/* + * Compute the Lowest Common Denominator (LCD) of two numbers. + * This implementation computes GCD first: + * LCD(a, b) = (a * b) / GCD(a, b) + * + */ +static inline uint32_t +rte_get_lcd(uint32_t a, uint32_t b) +{ + return (a * b) / rte_get_gcd(a, b); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */ diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 8eb45d863d..4b802556f7 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -105,6 +105,12 @@ ifeq ($(CONFIG_RTE_LIBRTE_METER),y) LDLIBS += -lrte_meter endif +ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y) +LDLIBS += -lrte_sched +LDLIBS += -lm +LDLIBS += -lrt +endif + LDLIBS += --start-group ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y) -- 2.20.1