1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
8 #include <rte_cycles.h>
10 #include <rte_hash_crc.h>
11 #include <rte_jhash.h>
12 #include <rte_launch.h>
13 #include <rte_malloc.h>
14 #include <rte_random.h>
15 #include <rte_spinlock.h>
19 #define RTE_RWTEST_FAIL 0
21 #define TOTAL_ENTRY (5*1024*1024)
22 #define TOTAL_INSERT (4.5*1024*1024)
23 #define TOTAL_INSERT_EXT (5*1024*1024)
26 unsigned int core_cnt[NUM_TEST] = {2, 4, 8};
28 unsigned int worker_core_ids[RTE_MAX_LCORE];
31 uint32_t single_write;
32 uint32_t read_only[NUM_TEST];
33 uint32_t write_only[NUM_TEST];
34 uint32_t read_write_r[NUM_TEST];
35 uint32_t read_write_w[NUM_TEST];
38 static struct perf htm_results, non_htm_results;
44 uint32_t rounded_tot_insert;
48 static uint64_t gcycles;
49 static uint64_t ginsertions;
51 static uint64_t gread_cycles;
52 static uint64_t gwrite_cycles;
54 static uint64_t greads;
55 static uint64_t gwrites;
58 test_hash_readwrite_worker(__rte_unused void *arg)
61 uint32_t lcore_id = rte_lcore_id();
62 uint64_t begin, cycles;
65 ret = rte_malloc(NULL, sizeof(int) *
66 tbl_rw_test_param.num_insert, 0);
67 for (i = 0; i < rte_lcore_count(); i++) {
68 if (worker_core_ids[i] == lcore_id)
71 offset = tbl_rw_test_param.num_insert * i;
73 printf("Core #%d inserting and reading %d: %'"PRId64" - %'"PRId64"\n",
74 lcore_id, tbl_rw_test_param.num_insert,
75 offset, offset + tbl_rw_test_param.num_insert - 1);
77 begin = rte_rdtsc_precise();
79 for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
81 if (rte_hash_lookup(tbl_rw_test_param.h,
82 tbl_rw_test_param.keys + i) > 0)
85 ret[i - offset] = rte_hash_add_key(tbl_rw_test_param.h,
86 tbl_rw_test_param.keys + i);
87 if (ret[i - offset] < 0)
90 /* lookup a random key */
91 uint32_t rand = rte_rand() % (i + 1 - offset);
93 if (rte_hash_lookup(tbl_rw_test_param.h,
94 tbl_rw_test_param.keys + rand) != ret[rand])
98 if (rte_hash_del_key(tbl_rw_test_param.h,
99 tbl_rw_test_param.keys + rand) != ret[rand])
102 ret[rand] = rte_hash_add_key(tbl_rw_test_param.h,
103 tbl_rw_test_param.keys + rand);
107 if (rte_hash_lookup(tbl_rw_test_param.h,
108 tbl_rw_test_param.keys + rand) != ret[rand])
112 cycles = rte_rdtsc_precise() - begin;
113 __atomic_fetch_add(&gcycles, cycles, __ATOMIC_RELAXED);
114 __atomic_fetch_add(&ginsertions, i - offset, __ATOMIC_RELAXED);
116 for (; i < offset + tbl_rw_test_param.num_insert; i++)
117 tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
124 init_params(int use_ext, int use_htm, int rw_lf, int use_jhash)
128 uint32_t *keys = NULL;
129 uint8_t *found = NULL;
130 struct rte_hash *handle;
132 struct rte_hash_parameters hash_params = {
133 .entries = TOTAL_ENTRY,
134 .key_len = sizeof(uint32_t),
135 .hash_func_init_val = 0,
136 .socket_id = rte_socket_id(),
139 hash_params.hash_func = rte_jhash;
141 hash_params.hash_func = rte_hash_crc;
143 hash_params.extra_flag = RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
145 hash_params.extra_flag |=
146 RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT;
148 hash_params.extra_flag |=
149 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF;
151 hash_params.extra_flag |=
152 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY;
155 hash_params.extra_flag |=
156 RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
158 hash_params.extra_flag &=
159 ~RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
161 hash_params.name = "tests";
163 handle = rte_hash_create(&hash_params);
164 if (handle == NULL) {
165 printf("hash creation failed");
169 tbl_rw_test_param.h = handle;
170 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
173 printf("RTE_MALLOC failed\n");
177 found = rte_zmalloc(NULL, sizeof(uint8_t) * TOTAL_ENTRY, 0);
179 printf("RTE_ZMALLOC failed\n");
183 tbl_rw_test_param.keys = keys;
184 tbl_rw_test_param.found = found;
186 for (i = 0; i < TOTAL_ENTRY; i++)
193 rte_hash_free(handle);
199 test_hash_readwrite_functional(int use_htm, int use_rw_lf, int use_ext)
202 const void *next_key;
206 uint32_t duplicated_keys = 0;
207 uint32_t lost_keys = 0;
209 int worker_cnt = rte_lcore_count() - 1;
210 uint32_t tot_insert = 0;
212 __atomic_store_n(&gcycles, 0, __ATOMIC_RELAXED);
213 __atomic_store_n(&ginsertions, 0, __ATOMIC_RELAXED);
215 if (init_params(use_ext, use_htm, use_rw_lf, use_jhash) != 0)
219 tot_insert = TOTAL_INSERT_EXT;
221 tot_insert = TOTAL_INSERT;
223 tbl_rw_test_param.num_insert =
224 tot_insert / worker_cnt;
226 tbl_rw_test_param.rounded_tot_insert =
227 tbl_rw_test_param.num_insert * worker_cnt;
229 printf("\nHTM = %d, RW-LF = %d, EXT-Table = %d\n",
230 use_htm, use_rw_lf, use_ext);
231 printf("++++++++Start function tests:+++++++++\n");
233 /* Fire all threads. */
234 rte_eal_mp_remote_launch(test_hash_readwrite_worker,
236 rte_eal_mp_wait_lcore();
238 while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
239 &next_data, &iter) >= 0) {
240 /* Search for the key in the list of keys added .*/
241 i = *(const uint32_t *)next_key;
242 tbl_rw_test_param.found[i]++;
245 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
246 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
247 if (tbl_rw_test_param.found[i] > 1) {
251 if (tbl_rw_test_param.found[i] == 0) {
253 printf("key %d is lost\n", i);
259 if (duplicated_keys > 0) {
260 printf("%d key duplicated\n", duplicated_keys);
265 printf("%d key lost\n", lost_keys);
269 printf("No key corrupted during read-write test.\n");
271 unsigned long long int cycles_per_insertion =
272 __atomic_load_n(&gcycles, __ATOMIC_RELAXED) /
273 __atomic_load_n(&ginsertions, __ATOMIC_RELAXED);
275 printf("cycles per insertion and lookup: %llu\n", cycles_per_insertion);
277 rte_free(tbl_rw_test_param.found);
278 rte_free(tbl_rw_test_param.keys);
279 rte_hash_free(tbl_rw_test_param.h);
280 printf("+++++++++Complete function tests+++++++++\n");
284 rte_free(tbl_rw_test_param.found);
285 rte_free(tbl_rw_test_param.keys);
286 rte_hash_free(tbl_rw_test_param.h);
292 test_rw_reader(void *arg)
295 uint64_t begin, cycles;
296 uint64_t read_cnt = (uint64_t)((uintptr_t)arg);
298 begin = rte_rdtsc_precise();
299 for (i = 0; i < read_cnt; i++) {
301 rte_hash_lookup_data(tbl_rw_test_param.h,
302 tbl_rw_test_param.keys + i,
304 if (i != (uint64_t)(uintptr_t)data) {
305 printf("lookup find wrong value %"PRIu64","
307 (uint64_t)(uintptr_t)data);
312 cycles = rte_rdtsc_precise() - begin;
313 __atomic_fetch_add(&gread_cycles, cycles, __ATOMIC_RELAXED);
314 __atomic_fetch_add(&greads, i, __ATOMIC_RELAXED);
319 test_rw_writer(void *arg)
322 uint32_t lcore_id = rte_lcore_id();
323 uint64_t begin, cycles;
325 uint64_t start_coreid = (uint64_t)(uintptr_t)arg;
328 for (i = 0; i < rte_lcore_count(); i++) {
329 if (worker_core_ids[i] == lcore_id)
333 offset = TOTAL_INSERT / 2 + (i - (start_coreid)) *
334 tbl_rw_test_param.num_insert;
335 begin = rte_rdtsc_precise();
336 for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
337 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
338 tbl_rw_test_param.keys + i,
339 (void *)((uintptr_t)i));
341 printf("writer failed %"PRIu64"\n", i);
346 cycles = rte_rdtsc_precise() - begin;
347 __atomic_fetch_add(&gwrite_cycles, cycles, __ATOMIC_RELAXED);
348 __atomic_fetch_add(&gwrites, tbl_rw_test_param.num_insert,
354 test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
360 uint64_t i, read_cnt;
362 const void *next_key;
367 uint32_t duplicated_keys = 0;
368 uint32_t lost_keys = 0;
370 uint64_t start = 0, end = 0;
372 __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
373 __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
375 __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
376 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
378 if (init_params(0, use_htm, 0, use_jhash) != 0)
382 * Do a readers finish faster or writers finish faster test.
383 * When readers finish faster, we timing the readers, and when writers
384 * finish faster, we timing the writers.
385 * Divided by 10 or 2 is just experimental values to vary the workload
389 printf("++++++Start perf test: reader++++++++\n");
390 read_cnt = TOTAL_INSERT / 10;
392 printf("++++++Start perf test: writer++++++++\n");
393 read_cnt = TOTAL_INSERT / 2;
396 /* We first test single thread performance */
397 start = rte_rdtsc_precise();
398 /* Insert half of the keys */
399 for (i = 0; i < TOTAL_INSERT / 2; i++) {
400 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
401 tbl_rw_test_param.keys + i,
402 (void *)((uintptr_t)i));
404 printf("Failed to insert half of keys\n");
408 end = rte_rdtsc_precise() - start;
409 perf_results->single_write = end / i;
411 start = rte_rdtsc_precise();
413 for (i = 0; i < read_cnt; i++) {
415 rte_hash_lookup_data(tbl_rw_test_param.h,
416 tbl_rw_test_param.keys + i,
418 if (i != (uint64_t)(uintptr_t)data) {
419 printf("lookup find wrong value"
420 " %"PRIu64",%"PRIu64"\n", i,
421 (uint64_t)(uintptr_t)data);
425 end = rte_rdtsc_precise() - start;
426 perf_results->single_read = end / i;
428 for (n = 0; n < NUM_TEST; n++) {
429 unsigned int tot_worker_lcore = rte_lcore_count() - 1;
430 if (tot_worker_lcore < core_cnt[n] * 2)
433 __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
434 __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
435 __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
436 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
438 rte_hash_reset(tbl_rw_test_param.h);
440 tbl_rw_test_param.num_insert = TOTAL_INSERT / 2 / core_cnt[n];
441 tbl_rw_test_param.rounded_tot_insert = TOTAL_INSERT / 2 +
442 tbl_rw_test_param.num_insert *
445 for (i = 0; i < TOTAL_INSERT / 2; i++) {
446 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
447 tbl_rw_test_param.keys + i,
448 (void *)((uintptr_t)i));
450 printf("Failed to insert half of keys\n");
455 /* Then test multiple thread case but only all reads or
459 /* Test only reader cases */
460 for (i = 0; i < core_cnt[n]; i++)
461 rte_eal_remote_launch(test_rw_reader,
462 (void *)(uintptr_t)read_cnt,
465 rte_eal_mp_wait_lcore();
468 /* Test only writer cases */
469 for (; i < core_cnt[n] * 2; i++)
470 rte_eal_remote_launch(test_rw_writer,
471 (void *)((uintptr_t)start_coreid),
474 rte_eal_mp_wait_lcore();
477 unsigned long long int cycles_per_insertion =
478 __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED) /
479 __atomic_load_n(&greads, __ATOMIC_RELAXED);
480 perf_results->read_only[n] = cycles_per_insertion;
481 printf("Reader only: cycles per lookup: %llu\n",
482 cycles_per_insertion);
486 unsigned long long int cycles_per_insertion =
487 __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
488 __atomic_load_n(&gwrites, __ATOMIC_RELAXED);
489 perf_results->write_only[n] = cycles_per_insertion;
490 printf("Writer only: cycles per writes: %llu\n",
491 cycles_per_insertion);
494 __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
495 __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
496 __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
497 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
499 rte_hash_reset(tbl_rw_test_param.h);
501 for (i = 0; i < TOTAL_INSERT / 2; i++) {
502 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
503 tbl_rw_test_param.keys + i,
504 (void *)((uintptr_t)i));
506 printf("Failed to insert half of keys\n");
511 start_coreid = core_cnt[n];
514 for (i = core_cnt[n]; i < core_cnt[n] * 2; i++)
515 rte_eal_remote_launch(test_rw_writer,
516 (void *)((uintptr_t)start_coreid),
518 for (i = 0; i < core_cnt[n]; i++)
519 rte_eal_remote_launch(test_rw_reader,
520 (void *)(uintptr_t)read_cnt,
523 for (i = 0; i < core_cnt[n]; i++)
524 rte_eal_remote_launch(test_rw_reader,
525 (void *)(uintptr_t)read_cnt,
527 for (; i < core_cnt[n] * 2; i++)
528 rte_eal_remote_launch(test_rw_writer,
529 (void *)((uintptr_t)start_coreid),
533 rte_eal_mp_wait_lcore();
536 memset(tbl_rw_test_param.found, 0, TOTAL_ENTRY);
537 while (rte_hash_iterate(tbl_rw_test_param.h,
538 &next_key, &next_data, &iter) >= 0) {
539 /* Search for the key in the list of keys added .*/
540 i = *(const uint32_t *)next_key;
541 tbl_rw_test_param.found[i]++;
544 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
545 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
546 if (tbl_rw_test_param.found[i] > 1) {
550 if (tbl_rw_test_param.found[i] == 0) {
552 printf("key %"PRIu64" is lost\n", i);
558 if (duplicated_keys > 0) {
559 printf("%d key duplicated\n", duplicated_keys);
564 printf("%d key lost\n", lost_keys);
568 printf("No key corrupted during read-write test.\n");
571 unsigned long long int cycles_per_insertion =
572 __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED) /
573 __atomic_load_n(&greads, __ATOMIC_RELAXED);
574 perf_results->read_write_r[n] = cycles_per_insertion;
575 printf("Read-write cycles per lookup: %llu\n",
576 cycles_per_insertion);
580 unsigned long long int cycles_per_insertion =
581 __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
582 __atomic_load_n(&gwrites, __ATOMIC_RELAXED);
583 perf_results->read_write_w[n] = cycles_per_insertion;
584 printf("Read-write cycles per writes: %llu\n",
585 cycles_per_insertion);
590 rte_free(tbl_rw_test_param.found);
591 rte_free(tbl_rw_test_param.keys);
592 rte_hash_free(tbl_rw_test_param.h);
596 rte_free(tbl_rw_test_param.found);
597 rte_free(tbl_rw_test_param.keys);
598 rte_hash_free(tbl_rw_test_param.h);
605 test_hash_rw_perf_main(void)
608 * Variables used to choose different tests.
609 * use_htm indicates if hardware transactional memory should be used.
610 * reader_faster indicates if the reader threads should finish earlier
611 * than writer threads. This is to timing either reader threads or
612 * writer threads for performance numbers.
614 int use_htm, reader_faster;
615 unsigned int i = 0, core_id = 0;
617 if (rte_lcore_count() < 3) {
618 printf("Not enough cores for hash_readwrite_autotest, expecting at least 3\n");
622 RTE_LCORE_FOREACH_WORKER(core_id) {
623 worker_core_ids[i] = core_id;
627 setlocale(LC_NUMERIC, "");
629 if (rte_tm_supported()) {
630 printf("Hardware transactional memory (lock elision) "
633 printf("Test read-write with Hardware transactional memory\n");
638 if (test_hash_readwrite_perf(&htm_results, use_htm,
643 if (test_hash_readwrite_perf(&htm_results, use_htm,
647 printf("Hardware transactional memory (lock elision) "
648 "is NOT supported\n");
651 printf("Test read-write without Hardware transactional memory\n");
655 if (test_hash_readwrite_perf(&non_htm_results, use_htm,
659 if (test_hash_readwrite_perf(&non_htm_results, use_htm,
663 printf("================\n");
664 printf("Results summary:\n");
665 printf("================\n");
667 printf("single read: %u\n", htm_results.single_read);
668 printf("single write: %u\n", htm_results.single_write);
669 for (i = 0; i < NUM_TEST; i++) {
670 printf("+++ core_cnt: %u +++\n", core_cnt[i]);
672 printf(" read only: %u\n", htm_results.read_only[i]);
673 printf(" write only: %u\n", htm_results.write_only[i]);
674 printf(" read-write read: %u\n", htm_results.read_write_r[i]);
675 printf(" read-write write: %u\n", htm_results.read_write_w[i]);
677 printf("non HTM:\n");
678 printf(" read only: %u\n", non_htm_results.read_only[i]);
679 printf(" write only: %u\n", non_htm_results.write_only[i]);
680 printf(" read-write read: %u\n",
681 non_htm_results.read_write_r[i]);
682 printf(" read-write write: %u\n",
683 non_htm_results.read_write_w[i]);
690 test_hash_rw_func_main(void)
693 * Variables used to choose different tests.
694 * use_htm indicates if hardware transactional memory should be used.
695 * reader_faster indicates if the reader threads should finish earlier
696 * than writer threads. This is to timing either reader threads or
697 * writer threads for performance numbers.
699 unsigned int i = 0, core_id = 0;
701 if (rte_lcore_count() < 3) {
702 printf("Not enough cores for hash_readwrite_autotest, expecting at least 3\n");
706 RTE_LCORE_FOREACH_WORKER(core_id) {
707 worker_core_ids[i] = core_id;
711 setlocale(LC_NUMERIC, "");
713 if (rte_tm_supported()) {
714 printf("Hardware transactional memory (lock elision) "
717 printf("Test read-write with Hardware transactional memory\n");
719 /* htm = 1, rw_lf = 0, ext = 0 */
720 if (test_hash_readwrite_functional(1, 0, 0) < 0)
723 /* htm = 1, rw_lf = 1, ext = 0 */
724 if (test_hash_readwrite_functional(1, 1, 0) < 0)
727 /* htm = 1, rw_lf = 0, ext = 1 */
728 if (test_hash_readwrite_functional(1, 0, 1) < 0)
731 /* htm = 1, rw_lf = 1, ext = 1 */
732 if (test_hash_readwrite_functional(1, 1, 1) < 0)
735 printf("Hardware transactional memory (lock elision) "
736 "is NOT supported\n");
739 printf("Test read-write without Hardware transactional memory\n");
740 /* htm = 0, rw_lf = 0, ext = 0 */
741 if (test_hash_readwrite_functional(0, 0, 0) < 0)
744 /* htm = 0, rw_lf = 1, ext = 0 */
745 if (test_hash_readwrite_functional(0, 1, 0) < 0)
748 /* htm = 0, rw_lf = 0, ext = 1 */
749 if (test_hash_readwrite_functional(0, 0, 1) < 0)
752 /* htm = 0, rw_lf = 1, ext = 1 */
753 if (test_hash_readwrite_functional(0, 1, 1) < 0)
759 REGISTER_TEST_COMMAND(hash_readwrite_func_autotest, test_hash_rw_func_main);
760 REGISTER_TEST_COMMAND(hash_readwrite_perf_autotest, test_hash_rw_perf_main);