From ac3fb3019c5205cfce968c884d8cc010d784307b Mon Sep 17 00:00:00 2001 From: Intel Date: Mon, 3 Jun 2013 00:00:00 +0000 Subject: [PATCH] app: rework ring tests Signed-off-by: Intel --- app/test/Makefile | 1 + app/test/autotest_data.py | 12 +- app/test/autotest_test_funcs.py | 2 +- app/test/commands.c | 3 + app/test/test.h | 1 + app/test/test_ring.c | 409 +------------------------------ app/test/test_ring_perf.c | 418 ++++++++++++++++++++++++++++++++ mk/rte.sdktest.mk | 6 +- 8 files changed, 437 insertions(+), 415 deletions(-) create mode 100644 app/test/test_ring_perf.c diff --git a/app/test/Makefile b/app/test/Makefile index 94c86e3114..de866a91c6 100755 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -53,6 +53,7 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_spinlock.c SRCS-$(CONFIG_RTE_APP_TEST) += test_memory.c SRCS-$(CONFIG_RTE_APP_TEST) += test_memzone.c SRCS-$(CONFIG_RTE_APP_TEST) += test_ring.c +SRCS-$(CONFIG_RTE_APP_TEST) += test_ring_perf.c SRCS-$(CONFIG_RTE_APP_TEST) += test_rwlock.c SRCS-$(CONFIG_RTE_APP_TEST) += test_timer.c SRCS-$(CONFIG_RTE_APP_TEST) += test_mempool.c diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index a322ce7c36..7cc5a1ac9f 100755 --- a/app/test/autotest_data.py +++ b/app/test/autotest_data.py @@ -236,6 +236,12 @@ parallel_test_group_list = [ "Func" : default_autotest, "Report" : None, }, + { + "Name" : "Ring autotest", + "Command" : "ring_autotest", + "Func" : default_autotest, + "Report" : None, + }, ] }, { @@ -432,9 +438,9 @@ non_parallel_test_group_list = [ "Tests" : [ { - "Name" : "Ring autotest", - "Command" : "ring_autotest", - "Func" : ring_autotest, + "Name" : "Ring performance autotest", + "Command" : "ring_perf_autotest", + "Func" : default_autotest, "Report" : None, }, ] diff --git a/app/test/autotest_test_funcs.py b/app/test/autotest_test_funcs.py index 61de366151..b48c522971 100644 --- a/app/test/autotest_test_funcs.py +++ b/app/test/autotest_test_funcs.py @@ -273,7 +273,7 @@ def timer_autotest(child, test_name): def ring_autotest(child, test_name): child.sendline(test_name) index = child.expect(["Test OK", "Test Failed", - pexpect.TIMEOUT], timeout = 1500) + pexpect.TIMEOUT], timeout = 15) if index == 1: return -1, "Fail" elif index == 2: diff --git a/app/test/commands.c b/app/test/commands.c index 096b97862f..21a504e369 100755 --- a/app/test/commands.c +++ b/app/test/commands.c @@ -157,6 +157,8 @@ static void cmd_autotest_parsed(void *parsed_result, ret |= test_cycles(); if (all || !strcmp(res->autotest, "ring_autotest")) ret |= test_ring(); + if (all || !strcmp(res->autotest, "ring_perf_autotest")) + ret |= test_ring_perf(); if (all || !strcmp(res->autotest, "timer_autotest")) ret |= test_timer(); if (all || !strcmp(res->autotest, "mempool_autotest")) @@ -211,6 +213,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest = "version_autotest#eal_fs_autotest#" "cmdline_autotest#func_reentrancy_autotest#" "mempool_perf_autotest#hash_perf_autotest#" + "memcpy_perf_autotest#ring_perf_autotest#" "red_autotest#meter_autotest#sched_autotest#" "memcpy_perf_autotest#kni_autotest#" "pm_autotest#acl_autotest#power_autotest#" diff --git a/app/test/test.h b/app/test/test.h index b991ce541d..265cf6ec7b 100755 --- a/app/test/test.h +++ b/app/test/test.h @@ -60,6 +60,7 @@ int test_cycles(void); int test_logs(void); int test_memzone(void); int test_ring(void); +int test_ring_perf(void); int test_mempool(void); int test_mempool_perf(void); int test_mbuf(void); diff --git a/app/test/test_ring.c b/app/test/test_ring.c index de0489fcd5..34dc20b747 100644 --- a/app/test/test_ring.c +++ b/app/test/test_ring.c @@ -99,24 +99,7 @@ * * #. Performance tests. * - * This test is done on the following configurations: - * - * - One core enqueuing, one core dequeuing - * - One core enqueuing, other cores dequeuing - * - One core dequeuing, other cores enqueuing - * - Half of the cores enqueuing, the other half dequeuing - * - * When only one core enqueues/dequeues, the test is done with the - * SP/SC functions in addition to the MP/MC functions. - * - * The test is done with different bulk size. - * - * On each core, the test enqueues or dequeues objects during - * TIME_S seconds. The number of successes and failures are stored on - * each core, then summed and displayed. - * - * The test checks that the number of enqueues is equal to the - * number of dequeues. + * Tests done in test_ring_perf.c */ #define RING_SIZE 4096 @@ -128,182 +111,6 @@ static rte_atomic32_t synchro; static struct rte_ring *r; -struct test_stats { - unsigned enq_success ; - unsigned enq_quota; - unsigned enq_fail; - - unsigned deq_success; - unsigned deq_fail; -} __rte_cache_aligned; - -static struct test_stats test_stats[RTE_MAX_LCORE]; - -static int -ring_enqueue_test(int (que_func)(struct rte_ring*, void * const *, unsigned), - void* arg, unsigned bulk_or_burst) -{ - unsigned success = 0; - unsigned quota = 0; - unsigned fail = 0; - unsigned i; - unsigned long dummy_obj; - void *obj_table[MAX_BULK]; - int ret; - unsigned lcore_id = rte_lcore_id(); - unsigned count = *((unsigned*)arg); - uint64_t start_cycles, end_cycles; - uint64_t time_diff = 0, hz = rte_get_hpet_hz(); - - /* init dummy object table */ - for (i = 0; i< MAX_BULK; i++) { - dummy_obj = lcore_id + 0x1000 + i; - obj_table[i] = (void *)dummy_obj; - } - - /* wait synchro for slaves */ - if (lcore_id != rte_get_master_lcore()) - while (rte_atomic32_read(&synchro) == 0); - - start_cycles = rte_get_hpet_cycles(); - - /* enqueue as many object as possible */ - while (time_diff/hz < TIME_S) { - for (i = 0; likely(i < N); i++) { - ret = que_func(r, obj_table, count); - /* - * bulk_or_burst - * 1: for bulk operation - * 0: for burst operation - */ - if (bulk_or_burst) { - /* The *count* objects enqueued, unless fail */ - if (ret == 0) - success += count; - else if (ret == -EDQUOT) - quota += count; - else - fail++; - } else { - /* The actual objects enqueued */ - if (ret != 0) - success += (ret & RTE_RING_SZ_MASK); - else - fail++; - } - } - end_cycles = rte_get_hpet_cycles(); - time_diff = end_cycles - start_cycles; - } - - /* write statistics in a shared structure */ - test_stats[lcore_id].enq_success = success; - test_stats[lcore_id].enq_quota = quota; - test_stats[lcore_id].enq_fail = fail; - - return 0; -} - -static int -ring_dequeue_test(int (que_func)(struct rte_ring*, void **, unsigned), - void* arg, unsigned bulk_or_burst) -{ - unsigned success = 0; - unsigned fail = 0; - unsigned i; - void *obj_table[MAX_BULK]; - int ret; - unsigned lcore_id = rte_lcore_id(); - unsigned count = *((unsigned*)arg); - uint64_t start_cycles, end_cycles; - uint64_t time_diff = 0, hz = rte_get_hpet_hz(); - - /* wait synchro for slaves */ - if (lcore_id != rte_get_master_lcore()) - while (rte_atomic32_read(&synchro) == 0); - - start_cycles = rte_get_hpet_cycles(); - - /* dequeue as many object as possible */ - while (time_diff/hz < TIME_S) { - for (i = 0; likely(i < N); i++) { - ret = que_func(r, obj_table, count); - /* - * bulk_or_burst - * 1: for bulk operation - * 0: for burst operation - */ - if (bulk_or_burst) { - if (ret == 0) - success += count; - else - fail++; - } else { - if (ret != 0) - success += ret; - else - fail++; - } - } - end_cycles = rte_get_hpet_cycles(); - time_diff = end_cycles - start_cycles; - } - - /* write statistics in a shared structure */ - test_stats[lcore_id].deq_success = success; - test_stats[lcore_id].deq_fail = fail; - - return 0; -} - -static int -test_ring_per_core_sp_enqueue(void *arg) -{ - return ring_enqueue_test(&rte_ring_sp_enqueue_bulk, arg, 1); -} - -static int -test_ring_per_core_mp_enqueue(void *arg) -{ - return ring_enqueue_test(&rte_ring_mp_enqueue_bulk, arg, 1); -} - -static int -test_ring_per_core_mc_dequeue(void *arg) -{ - return ring_dequeue_test(&rte_ring_mc_dequeue_bulk, arg, 1); -} - -static int -test_ring_per_core_sc_dequeue(void *arg) -{ - return ring_dequeue_test(&rte_ring_sc_dequeue_bulk, arg, 1); -} - -static int -test_ring_per_core_sp_enqueue_burst(void *arg) -{ - return ring_enqueue_test(&rte_ring_sp_enqueue_burst, arg, 0); -} - -static int -test_ring_per_core_mp_enqueue_burst(void *arg) -{ - return ring_enqueue_test(&rte_ring_mp_enqueue_burst, arg, 0); -} - -static int -test_ring_per_core_mc_dequeue_burst(void *arg) -{ - return ring_dequeue_test(&rte_ring_mc_dequeue_burst, arg, 0); -} - -static int -test_ring_per_core_sc_dequeue_burst(void *arg) -{ - return ring_dequeue_test(&rte_ring_sc_dequeue_burst, arg, 0); -} - #define TEST_RING_VERIFY(exp) \ if (!(exp)) { \ printf("error at %s:%d\tcondition " #exp " failed\n", \ @@ -314,166 +121,6 @@ test_ring_per_core_sc_dequeue_burst(void *arg) #define TEST_RING_FULL_EMTPY_ITER 8 - -static int -launch_cores(unsigned enq_core_count, unsigned deq_core_count, - unsigned n_enq_bulk, unsigned n_deq_bulk, - int sp, int sc, int bulk_not_burst) -{ - void *obj; - unsigned lcore_id; - unsigned rate, deq_remain = 0; - unsigned enq_total, deq_total; - struct test_stats sum; - int (*enq_f)(void *); - int (*deq_f)(void *); - unsigned cores = enq_core_count + deq_core_count; - int ret; - - rte_atomic32_set(&synchro, 0); - - printf("ring_autotest e/d_core=%u,%u e/d_bulk=%u,%u ", - enq_core_count, deq_core_count, n_enq_bulk, n_deq_bulk); - printf("sp=%d sc=%d ", sp, sc); - - if (bulk_not_burst) { - /* set enqueue function to be used */ - if (sp) - enq_f = test_ring_per_core_sp_enqueue; - else - enq_f = test_ring_per_core_mp_enqueue; - - /* set dequeue function to be used */ - if (sc) - deq_f = test_ring_per_core_sc_dequeue; - else - deq_f = test_ring_per_core_mc_dequeue; - - } else { - /* set enqueue function to be used */ - if (sp) - enq_f = test_ring_per_core_sp_enqueue_burst; - else - enq_f = test_ring_per_core_mp_enqueue_burst; - - /* set dequeue function to be used */ - if (sc) - deq_f = test_ring_per_core_sc_dequeue_burst; - else - deq_f = test_ring_per_core_mc_dequeue_burst; - } - - RTE_LCORE_FOREACH_SLAVE(lcore_id) { - if (enq_core_count != 0) { - enq_core_count--; - rte_eal_remote_launch(enq_f, &n_enq_bulk, lcore_id); - } - if (deq_core_count != 1) { - deq_core_count--; - rte_eal_remote_launch(deq_f, &n_deq_bulk, lcore_id); - } - } - - memset(test_stats, 0, sizeof(test_stats)); - - /* start synchro and launch test on master */ - rte_atomic32_set(&synchro, 1); - ret = deq_f(&n_deq_bulk); - - /* wait all cores */ - RTE_LCORE_FOREACH_SLAVE(lcore_id) { - if (cores == 1) - break; - cores--; - if (rte_eal_wait_lcore(lcore_id) < 0) - ret = -1; - } - - memset(&sum, 0, sizeof(sum)); - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { - sum.enq_success += test_stats[lcore_id].enq_success; - sum.enq_quota += test_stats[lcore_id].enq_quota; - sum.enq_fail += test_stats[lcore_id].enq_fail; - sum.deq_success += test_stats[lcore_id].deq_success; - sum.deq_fail += test_stats[lcore_id].deq_fail; - } - - /* empty the ring */ - while (rte_ring_sc_dequeue(r, &obj) == 0) - deq_remain += 1; - - if (ret < 0) { - printf("per-lcore test returned -1\n"); - return -1; - } - - enq_total = sum.enq_success + sum.enq_quota; - deq_total = sum.deq_success + deq_remain; - - rate = deq_total/TIME_S; - - printf("rate_persec=%u\n", rate); - - if (enq_total != deq_total) { - printf("invalid enq/deq_success counter: %u %u\n", - enq_total, deq_total); - return -1; - } - - return 0; -} - -static int -do_one_ring_test2(unsigned enq_core_count, unsigned deq_core_count, - unsigned n_enq_bulk, unsigned n_deq_bulk, unsigned bulk_or_burst) -{ - int sp, sc; - int do_sp, do_sc; - int ret; - - do_sp = (enq_core_count == 1) ? 1 : 0; - do_sc = (deq_core_count == 1) ? 1 : 0; - - for (sp = 0; sp <= do_sp; sp ++) { - for (sc = 0; sc <= do_sc; sc ++) { - ret = launch_cores(enq_core_count, deq_core_count, - n_enq_bulk, n_deq_bulk, sp, sc, bulk_or_burst); - if (ret < 0) - return -1; - } - } - return 0; -} - -static int -do_one_ring_test(unsigned enq_core_count, unsigned deq_core_count, - unsigned bulk_or_burst) -{ - unsigned bulk_enqueue_tab[] = { 1, 2, 4, 32, 0 }; - unsigned bulk_dequeue_tab[] = { 1, 2, 4, 32, 0 }; - unsigned *bulk_enqueue_ptr; - unsigned *bulk_dequeue_ptr; - int ret; - - for (bulk_enqueue_ptr = bulk_enqueue_tab; - *bulk_enqueue_ptr; - bulk_enqueue_ptr++) { - - for (bulk_dequeue_ptr = bulk_dequeue_tab; - *bulk_dequeue_ptr; - bulk_dequeue_ptr++) { - - ret = do_one_ring_test2(enq_core_count, deq_core_count, - *bulk_enqueue_ptr, - *bulk_dequeue_ptr, - bulk_or_burst); - if (ret < 0) - return -1; - } - } - return 0; -} - static int check_live_watermark_change(__attribute__((unused)) void *dummy) { @@ -1681,8 +1328,6 @@ fail_test: int test_ring(void) { - unsigned enq_core_count, deq_core_count; - /* some more basic operations */ if (test_ring_basic_ex() < 0) return -1; @@ -1737,58 +1382,6 @@ test_ring(void) else printf ( "Test detected NULL ring lookup \n"); - printf("start performance tests \n"); - - /* one lcore for enqueue, one for dequeue */ - enq_core_count = 1; - deq_core_count = 1; - if (do_one_ring_test(enq_core_count, deq_core_count, 1) < 0) - return -1; - - /* max cores for enqueue, one for dequeue */ - enq_core_count = rte_lcore_count() - 1; - deq_core_count = 1; - if (do_one_ring_test(enq_core_count, deq_core_count, 1) < 0) - return -1; - - /* max cores for dequeue, one for enqueue */ - enq_core_count = 1; - deq_core_count = rte_lcore_count() - 1; - if (do_one_ring_test(enq_core_count, deq_core_count, 1) < 0) - return -1; - - /* half for enqueue and half for dequeue */ - enq_core_count = rte_lcore_count() / 2; - deq_core_count = rte_lcore_count() / 2; - if (do_one_ring_test(enq_core_count, deq_core_count, 1) < 0) - return -1; - - printf("start performance tests - burst operations \n"); - - /* one lcore for enqueue, one for dequeue */ - enq_core_count = 1; - deq_core_count = 1; - if (do_one_ring_test(enq_core_count, deq_core_count, 0) < 0) - return -1; - - /* max cores for enqueue, one for dequeue */ - enq_core_count = rte_lcore_count() - 1; - deq_core_count = 1; - if (do_one_ring_test(enq_core_count, deq_core_count, 0) < 0) - return -1; - - /* max cores for dequeue, one for enqueue */ - enq_core_count = 1; - deq_core_count = rte_lcore_count() - 1; - if (do_one_ring_test(enq_core_count, deq_core_count, 0) < 0) - return -1; - - /* half for enqueue and half for dequeue */ - enq_core_count = rte_lcore_count() / 2; - deq_core_count = rte_lcore_count() / 2; - if (do_one_ring_test(enq_core_count, deq_core_count, 0) < 0) - return -1; - /* test of creating ring with wrong size */ if (test_ring_creation_with_wrong_size() < 0) return -1; diff --git a/app/test/test_ring_perf.c b/app/test/test_ring_perf.c new file mode 100644 index 0000000000..25b249fe71 --- /dev/null +++ b/app/test/test_ring_perf.c @@ -0,0 +1,418 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#include +#include +#include +#include +#include + +#include + +#include "test.h" + +/* + * Ring + * ==== + * + * Measures performance of various operations using rdtsc + * * Empty ring dequeue + * * Enqueue/dequeue of bursts in 1 threads + * * Enqueue/dequeue of bursts in 2 threads + */ + +#define RING_NAME "RING_PERF" +#define RING_SIZE 4096 +#define MAX_BURST 32 + +/* + * the sizes to enqueue and dequeue in testing + * (marked volatile so they won't be seen as compile-time constants) + */ +static const volatile unsigned bulk_sizes[] = { 8, 32 }; + +/* The ring structure used for tests */ +static struct rte_ring *r; + +struct lcore_pair { + unsigned c1, c2; +}; + +static volatile unsigned lcore_count = 0; + +/**** Functions to analyse our core mask to get cores for different tests ***/ + +static int +get_two_hyperthreads(struct lcore_pair *lcp) +{ + unsigned id1, id2; + unsigned c1, c2, s1, s2; + RTE_LCORE_FOREACH(id1) { + /* inner loop just re-reads all id's. We could skip the first few + * elements, but since number of cores is small there is little point + */ + RTE_LCORE_FOREACH(id2) { + if (id1 == id2) + continue; + c1 = lcore_config[id1].core_id; + c2 = lcore_config[id2].core_id; + s1 = lcore_config[id1].socket_id; + s2 = lcore_config[id2].socket_id; + if ((c1 == c2) && (s1 == s2)){ + lcp->c1 = id1; + lcp->c2 = id2; + return 0; + } + } + } + return 1; +} + +static int +get_two_cores(struct lcore_pair *lcp) +{ + unsigned id1, id2; + unsigned c1, c2, s1, s2; + RTE_LCORE_FOREACH(id1) { + RTE_LCORE_FOREACH(id2) { + if (id1 == id2) + continue; + c1 = lcore_config[id1].core_id; + c2 = lcore_config[id2].core_id; + s1 = lcore_config[id1].socket_id; + s2 = lcore_config[id2].socket_id; + if ((c1 != c2) && (s1 == s2)){ + lcp->c1 = id1; + lcp->c2 = id2; + return 0; + } + } + } + return 1; +} + +static int +get_two_sockets(struct lcore_pair *lcp) +{ + unsigned id1, id2; + unsigned s1, s2; + RTE_LCORE_FOREACH(id1) { + RTE_LCORE_FOREACH(id2) { + if (id1 == id2) + continue; + s1 = lcore_config[id1].socket_id; + s2 = lcore_config[id2].socket_id; + if (s1 != s2){ + lcp->c1 = id1; + lcp->c2 = id2; + return 0; + } + } + } + return 1; +} + +/* Get cycle counts for dequeuing from an empty ring. Should be 2 or 3 cycles */ +static void +test_empty_dequeue(void) +{ + const unsigned iter_shift = 26; + const unsigned iterations = 1<size; + unsigned i; + void *burst[MAX_BURST] = {0}; + + if ( __sync_add_and_fetch(&lcore_count, 1) != 2 ) + while(lcore_count != 2) + rte_pause(); + + const uint64_t sp_start = rte_rdtsc(); + for (i = 0; i < iterations; i++) + while (rte_ring_sp_enqueue_bulk(r, burst, size) != 0) + rte_pause(); + const uint64_t sp_end = rte_rdtsc(); + + const uint64_t mp_start = rte_rdtsc(); + for (i = 0; i < iterations; i++) + while (rte_ring_mp_enqueue_bulk(r, burst, size) != 0) + rte_pause(); + const uint64_t mp_end = rte_rdtsc(); + + params->spsc = ((double)(sp_end - sp_start))/(iterations*size); + params->mpmc = ((double)(mp_end - mp_start))/(iterations*size); + return 0; +} + +/* + * Function that uses rdtsc to measure timing for ring dequeue. Needs pair + * thread running enqueue_bulk function + */ +static int +dequeue_bulk(void *p) +{ + const unsigned iter_shift = 23; + const unsigned iterations = 1<size; + unsigned i; + void *burst[MAX_BURST] = {0}; + + if ( __sync_add_and_fetch(&lcore_count, 1) != 2 ) + while(lcore_count != 2) + rte_pause(); + + const uint64_t sc_start = rte_rdtsc(); + for (i = 0; i < iterations; i++) + while (rte_ring_sc_dequeue_bulk(r, burst, size) != 0) + rte_pause(); + const uint64_t sc_end = rte_rdtsc(); + + const uint64_t mc_start = rte_rdtsc(); + for (i = 0; i < iterations; i++) + while (rte_ring_mc_dequeue_bulk(r, burst, size) != 0) + rte_pause(); + const uint64_t mc_end = rte_rdtsc(); + + params->spsc = ((double)(sc_end - sc_start))/(iterations*size); + params->mpmc = ((double)(mc_end - mc_start))/(iterations*size); + return 0; +} + +/* + * Function that calls the enqueue and dequeue bulk functions on pairs of cores. + * used to measure ring perf between hyperthreads, cores and sockets. + */ +static void +run_on_core_pair(struct lcore_pair *cores, + lcore_function_t f1, lcore_function_t f2) +{ + struct thread_params param1 = {.size = 0}, param2 = {.size = 0}; + unsigned i; + for (i = 0; i < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); i++) { + lcore_count = 0; + param1.size = param2.size = bulk_sizes[i]; + if (cores->c1 == rte_get_master_lcore()) { + rte_eal_remote_launch(f2, ¶m2, cores->c2); + f1(¶m1); + rte_eal_wait_lcore(cores->c2); + } else { + rte_eal_remote_launch(f1, ¶m1, cores->c1); + rte_eal_remote_launch(f2, ¶m2, cores->c2); + rte_eal_wait_lcore(cores->c1); + rte_eal_wait_lcore(cores->c2); + } + printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[i], + param1.spsc + param2.spsc); + printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[i], + param1.mpmc + param2.mpmc); + } +} + +/* + * Test function that determines how long an enqueue + dequeue of a single item + * takes on a single lcore. Result is for comparison with the bulk enq+deq. + */ +static void +test_single_enqueue_dequeue(void) +{ + const unsigned iter_shift = 24; + const unsigned iterations = 1<> iter_shift); + printf("MP/MC single enq/dequeue: %"PRIu64"\n", + (mc_end-mc_start) >> iter_shift); +} + +/* + * Test that does both enqueue and dequeue on a core using the burst() API calls + * instead of the bulk() calls used in other tests. Results should be the same + * as for the bulk function called on a single lcore. + */ +static void +test_burst_enqueue_dequeue(void) +{ + const unsigned iter_shift = 23; + const unsigned iterations = 1<> iter_shift) / bulk_sizes[sz]; + uint64_t sc_avg = ((sc_end-sc_start) >> iter_shift) / bulk_sizes[sz]; + + printf("SP/SC burst enq/dequeue (size: %u): %"PRIu64"\n", bulk_sizes[sz], + sc_avg); + printf("MP/MC burst enq/dequeue (size: %u): %"PRIu64"\n", bulk_sizes[sz], + mc_avg); + } +} + +/* Times enqueue and dequeue on a single lcore */ +static void +test_bulk_enqueue_dequeue(void) +{ + const unsigned iter_shift = 23; + const unsigned iterations = 1<