/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2019 Arm Limited
*/
#include <rte_cycles.h>
#include <rte_launch.h>
#include <rte_pause.h>
+#include <string.h>
#include "test.h"
* * Empty ring dequeue
* * Enqueue/dequeue of bursts in 1 threads
* * Enqueue/dequeue of bursts in 2 threads
+ * * Enqueue/dequeue of bursts in all available threads
*/
#define RING_NAME "RING_PERF"
RTE_LCORE_FOREACH(id2) {
if (id1 == id2)
continue;
- c1 = lcore_config[id1].core_id;
- c2 = lcore_config[id2].core_id;
- s1 = lcore_config[id1].socket_id;
- s2 = lcore_config[id2].socket_id;
+
+ c1 = rte_lcore_to_cpu_id(id1);
+ c2 = rte_lcore_to_cpu_id(id2);
+ s1 = rte_lcore_to_socket_id(id1);
+ s2 = rte_lcore_to_socket_id(id2);
if ((c1 == c2) && (s1 == s2)){
lcp->c1 = id1;
lcp->c2 = id2;
RTE_LCORE_FOREACH(id2) {
if (id1 == id2)
continue;
- c1 = lcore_config[id1].core_id;
- c2 = lcore_config[id2].core_id;
- s1 = lcore_config[id1].socket_id;
- s2 = lcore_config[id2].socket_id;
+
+ c1 = rte_lcore_to_cpu_id(id1);
+ c2 = rte_lcore_to_cpu_id(id2);
+ s1 = rte_lcore_to_socket_id(id1);
+ s2 = rte_lcore_to_socket_id(id2);
if ((c1 != c2) && (s1 == s2)){
lcp->c1 = id1;
lcp->c2 = id2;
RTE_LCORE_FOREACH(id2) {
if (id1 == id2)
continue;
- s1 = lcore_config[id1].socket_id;
- s2 = lcore_config[id2].socket_id;
+ s1 = rte_lcore_to_socket_id(id1);
+ s2 = rte_lcore_to_socket_id(id2);
if (s1 != s2){
lcp->c1 = id1;
lcp->c2 = id2;
unsigned i;
void *burst[MAX_BURST] = {0};
- if ( __sync_add_and_fetch(&lcore_count, 1) != 2 )
+#ifdef RTE_USE_C11_MEM_MODEL
+ if (__atomic_add_fetch(&lcore_count, 1, __ATOMIC_RELAXED) != 2)
+#else
+ if (__sync_add_and_fetch(&lcore_count, 1) != 2)
+#endif
while(lcore_count != 2)
rte_pause();
unsigned i;
void *burst[MAX_BURST] = {0};
- if ( __sync_add_and_fetch(&lcore_count, 1) != 2 )
+#ifdef RTE_USE_C11_MEM_MODEL
+ if (__atomic_add_fetch(&lcore_count, 1, __ATOMIC_RELAXED) != 2)
+#else
+ if (__sync_add_and_fetch(&lcore_count, 1) != 2)
+#endif
while(lcore_count != 2)
rte_pause();
}
}
+static rte_atomic32_t synchro;
+static uint64_t queue_count[RTE_MAX_LCORE];
+
+#define TIME_MS 100
+
+static int
+load_loop_fn(void *p)
+{
+ uint64_t time_diff = 0;
+ uint64_t begin = 0;
+ uint64_t hz = rte_get_timer_hz();
+ uint64_t lcount = 0;
+ const unsigned int lcore = rte_lcore_id();
+ struct thread_params *params = p;
+ void *burst[MAX_BURST] = {0};
+
+ /* wait synchro for slaves */
+ if (lcore != rte_get_master_lcore())
+ while (rte_atomic32_read(&synchro) == 0)
+ rte_pause();
+
+ begin = rte_get_timer_cycles();
+ while (time_diff < hz * TIME_MS / 1000) {
+ rte_ring_mp_enqueue_bulk(params->r, burst, params->size, NULL);
+ rte_ring_mc_dequeue_bulk(params->r, burst, params->size, NULL);
+ lcount++;
+ time_diff = rte_get_timer_cycles() - begin;
+ }
+ queue_count[lcore] = lcount;
+ return 0;
+}
+
+static int
+run_on_all_cores(struct rte_ring *r)
+{
+ uint64_t total = 0;
+ struct thread_params param;
+ unsigned int i, c;
+
+ memset(¶m, 0, sizeof(struct thread_params));
+ for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
+ printf("\nBulk enq/dequeue count on size %u\n", bulk_sizes[i]);
+ param.size = bulk_sizes[i];
+ param.r = r;
+
+ /* clear synchro and start slaves */
+ rte_atomic32_set(&synchro, 0);
+ if (rte_eal_mp_remote_launch(load_loop_fn, ¶m,
+ SKIP_MASTER) < 0)
+ return -1;
+
+ /* start synchro and launch test on master */
+ rte_atomic32_set(&synchro, 1);
+ load_loop_fn(¶m);
+
+ rte_eal_mp_wait_lcore();
+
+ RTE_LCORE_FOREACH(c) {
+ printf("Core [%u] count = %"PRIu64"\n",
+ c, queue_count[c]);
+ total += queue_count[c];
+ }
+
+ printf("Total count (size: %u): %"PRIu64"\n",
+ bulk_sizes[i], total);
+ }
+
+ return 0;
+}
+
/*
* Test function that determines how long an enqueue + dequeue of a single item
* takes on a single lcore. Result is for comparison with the bulk enq+deq.
printf("\n### Testing using two NUMA nodes ###\n");
run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk);
}
+
+ printf("\n### Testing using all slave nodes ###\n");
+ run_on_all_cores(r);
+
rte_ring_free(r);
return 0;
}