1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
8 #include <rte_cycles.h>
10 #include <rte_hash_crc.h>
11 #include <rte_jhash.h>
12 #include <rte_launch.h>
13 #include <rte_malloc.h>
14 #include <rte_random.h>
15 #include <rte_spinlock.h>
19 #define RTE_RWTEST_FAIL 0
21 #define TOTAL_ENTRY (5*1024*1024)
22 #define TOTAL_INSERT (4.5*1024*1024)
23 #define TOTAL_INSERT_EXT (5*1024*1024)
26 unsigned int core_cnt[NUM_TEST] = {2, 4, 8};
28 unsigned int worker_core_ids[RTE_MAX_LCORE];
31 uint32_t single_write;
32 uint32_t read_only[NUM_TEST];
33 uint32_t write_only[NUM_TEST];
34 uint32_t read_write_r[NUM_TEST];
35 uint32_t read_write_w[NUM_TEST];
38 static struct perf htm_results, non_htm_results;
44 uint32_t rounded_tot_insert;
48 static rte_atomic64_t gcycles;
49 static rte_atomic64_t ginsertions;
51 static rte_atomic64_t gread_cycles;
52 static rte_atomic64_t gwrite_cycles;
54 static rte_atomic64_t greads;
55 static rte_atomic64_t gwrites;
58 test_hash_readwrite_worker(__rte_unused void *arg)
61 uint32_t lcore_id = rte_lcore_id();
62 uint64_t begin, cycles;
65 ret = rte_malloc(NULL, sizeof(int) *
66 tbl_rw_test_param.num_insert, 0);
67 for (i = 0; i < rte_lcore_count(); i++) {
68 if (worker_core_ids[i] == lcore_id)
71 offset = tbl_rw_test_param.num_insert * i;
73 printf("Core #%d inserting and reading %d: %'"PRId64" - %'"PRId64"\n",
74 lcore_id, tbl_rw_test_param.num_insert,
75 offset, offset + tbl_rw_test_param.num_insert - 1);
77 begin = rte_rdtsc_precise();
79 for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
81 if (rte_hash_lookup(tbl_rw_test_param.h,
82 tbl_rw_test_param.keys + i) > 0)
85 ret[i - offset] = rte_hash_add_key(tbl_rw_test_param.h,
86 tbl_rw_test_param.keys + i);
87 if (ret[i - offset] < 0)
90 /* lookup a random key */
91 uint32_t rand = rte_rand() % (i + 1 - offset);
93 if (rte_hash_lookup(tbl_rw_test_param.h,
94 tbl_rw_test_param.keys + rand) != ret[rand])
98 if (rte_hash_del_key(tbl_rw_test_param.h,
99 tbl_rw_test_param.keys + rand) != ret[rand])
102 ret[rand] = rte_hash_add_key(tbl_rw_test_param.h,
103 tbl_rw_test_param.keys + rand);
107 if (rte_hash_lookup(tbl_rw_test_param.h,
108 tbl_rw_test_param.keys + rand) != ret[rand])
112 cycles = rte_rdtsc_precise() - begin;
113 rte_atomic64_add(&gcycles, cycles);
114 rte_atomic64_add(&ginsertions, i - offset);
116 for (; i < offset + tbl_rw_test_param.num_insert; i++)
117 tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
124 init_params(int use_ext, int use_htm, int rw_lf, int use_jhash)
128 uint32_t *keys = NULL;
129 uint8_t *found = NULL;
130 struct rte_hash *handle;
132 struct rte_hash_parameters hash_params = {
133 .entries = TOTAL_ENTRY,
134 .key_len = sizeof(uint32_t),
135 .hash_func_init_val = 0,
136 .socket_id = rte_socket_id(),
139 hash_params.hash_func = rte_jhash;
141 hash_params.hash_func = rte_hash_crc;
143 hash_params.extra_flag = RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
145 hash_params.extra_flag |=
146 RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT;
148 hash_params.extra_flag |=
149 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF;
151 hash_params.extra_flag |=
152 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY;
155 hash_params.extra_flag |=
156 RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
158 hash_params.extra_flag &=
159 ~RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
161 hash_params.name = "tests";
163 handle = rte_hash_create(&hash_params);
164 if (handle == NULL) {
165 printf("hash creation failed");
169 tbl_rw_test_param.h = handle;
170 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
173 printf("RTE_MALLOC failed\n");
177 found = rte_zmalloc(NULL, sizeof(uint8_t) * TOTAL_ENTRY, 0);
179 printf("RTE_ZMALLOC failed\n");
183 tbl_rw_test_param.keys = keys;
184 tbl_rw_test_param.found = found;
186 for (i = 0; i < TOTAL_ENTRY; i++)
193 rte_hash_free(handle);
199 test_hash_readwrite_functional(int use_htm, int use_rw_lf, int use_ext)
202 const void *next_key;
206 uint32_t duplicated_keys = 0;
207 uint32_t lost_keys = 0;
209 int worker_cnt = rte_lcore_count() - 1;
210 uint32_t tot_insert = 0;
212 rte_atomic64_init(&gcycles);
213 rte_atomic64_clear(&gcycles);
215 rte_atomic64_init(&ginsertions);
216 rte_atomic64_clear(&ginsertions);
218 if (init_params(use_ext, use_htm, use_rw_lf, use_jhash) != 0)
222 tot_insert = TOTAL_INSERT_EXT;
224 tot_insert = TOTAL_INSERT;
226 tbl_rw_test_param.num_insert =
227 tot_insert / worker_cnt;
229 tbl_rw_test_param.rounded_tot_insert =
230 tbl_rw_test_param.num_insert * worker_cnt;
232 printf("\nHTM = %d, RW-LF = %d, EXT-Table = %d\n",
233 use_htm, use_rw_lf, use_ext);
234 printf("++++++++Start function tests:+++++++++\n");
236 /* Fire all threads. */
237 rte_eal_mp_remote_launch(test_hash_readwrite_worker,
239 rte_eal_mp_wait_lcore();
241 while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
242 &next_data, &iter) >= 0) {
243 /* Search for the key in the list of keys added .*/
244 i = *(const uint32_t *)next_key;
245 tbl_rw_test_param.found[i]++;
248 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
249 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
250 if (tbl_rw_test_param.found[i] > 1) {
254 if (tbl_rw_test_param.found[i] == 0) {
256 printf("key %d is lost\n", i);
262 if (duplicated_keys > 0) {
263 printf("%d key duplicated\n", duplicated_keys);
268 printf("%d key lost\n", lost_keys);
272 printf("No key corrupted during read-write test.\n");
274 unsigned long long int cycles_per_insertion =
275 rte_atomic64_read(&gcycles) /
276 rte_atomic64_read(&ginsertions);
278 printf("cycles per insertion and lookup: %llu\n", cycles_per_insertion);
280 rte_free(tbl_rw_test_param.found);
281 rte_free(tbl_rw_test_param.keys);
282 rte_hash_free(tbl_rw_test_param.h);
283 printf("+++++++++Complete function tests+++++++++\n");
287 rte_free(tbl_rw_test_param.found);
288 rte_free(tbl_rw_test_param.keys);
289 rte_hash_free(tbl_rw_test_param.h);
295 test_rw_reader(void *arg)
298 uint64_t begin, cycles;
299 uint64_t read_cnt = (uint64_t)((uintptr_t)arg);
301 begin = rte_rdtsc_precise();
302 for (i = 0; i < read_cnt; i++) {
304 rte_hash_lookup_data(tbl_rw_test_param.h,
305 tbl_rw_test_param.keys + i,
307 if (i != (uint64_t)(uintptr_t)data) {
308 printf("lookup find wrong value %"PRIu64","
310 (uint64_t)(uintptr_t)data);
315 cycles = rte_rdtsc_precise() - begin;
316 rte_atomic64_add(&gread_cycles, cycles);
317 rte_atomic64_add(&greads, i);
322 test_rw_writer(void *arg)
325 uint32_t lcore_id = rte_lcore_id();
326 uint64_t begin, cycles;
328 uint64_t start_coreid = (uint64_t)(uintptr_t)arg;
331 for (i = 0; i < rte_lcore_count(); i++) {
332 if (worker_core_ids[i] == lcore_id)
336 offset = TOTAL_INSERT / 2 + (i - (start_coreid)) *
337 tbl_rw_test_param.num_insert;
338 begin = rte_rdtsc_precise();
339 for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
340 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
341 tbl_rw_test_param.keys + i,
342 (void *)((uintptr_t)i));
344 printf("writer failed %"PRIu64"\n", i);
349 cycles = rte_rdtsc_precise() - begin;
350 rte_atomic64_add(&gwrite_cycles, cycles);
351 rte_atomic64_add(&gwrites, tbl_rw_test_param.num_insert);
356 test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
362 uint64_t i, read_cnt;
364 const void *next_key;
369 uint32_t duplicated_keys = 0;
370 uint32_t lost_keys = 0;
372 uint64_t start = 0, end = 0;
374 rte_atomic64_init(&greads);
375 rte_atomic64_init(&gwrites);
376 rte_atomic64_clear(&gwrites);
377 rte_atomic64_clear(&greads);
379 rte_atomic64_init(&gread_cycles);
380 rte_atomic64_clear(&gread_cycles);
381 rte_atomic64_init(&gwrite_cycles);
382 rte_atomic64_clear(&gwrite_cycles);
384 if (init_params(0, use_htm, 0, use_jhash) != 0)
388 * Do a readers finish faster or writers finish faster test.
389 * When readers finish faster, we timing the readers, and when writers
390 * finish faster, we timing the writers.
391 * Divided by 10 or 2 is just experimental values to vary the workload
395 printf("++++++Start perf test: reader++++++++\n");
396 read_cnt = TOTAL_INSERT / 10;
398 printf("++++++Start perf test: writer++++++++\n");
399 read_cnt = TOTAL_INSERT / 2;
402 /* We first test single thread performance */
403 start = rte_rdtsc_precise();
404 /* Insert half of the keys */
405 for (i = 0; i < TOTAL_INSERT / 2; i++) {
406 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
407 tbl_rw_test_param.keys + i,
408 (void *)((uintptr_t)i));
410 printf("Failed to insert half of keys\n");
414 end = rte_rdtsc_precise() - start;
415 perf_results->single_write = end / i;
417 start = rte_rdtsc_precise();
419 for (i = 0; i < read_cnt; i++) {
421 rte_hash_lookup_data(tbl_rw_test_param.h,
422 tbl_rw_test_param.keys + i,
424 if (i != (uint64_t)(uintptr_t)data) {
425 printf("lookup find wrong value"
426 " %"PRIu64",%"PRIu64"\n", i,
427 (uint64_t)(uintptr_t)data);
431 end = rte_rdtsc_precise() - start;
432 perf_results->single_read = end / i;
434 for (n = 0; n < NUM_TEST; n++) {
435 unsigned int tot_worker_lcore = rte_lcore_count() - 1;
436 if (tot_worker_lcore < core_cnt[n] * 2)
439 rte_atomic64_clear(&greads);
440 rte_atomic64_clear(&gread_cycles);
441 rte_atomic64_clear(&gwrites);
442 rte_atomic64_clear(&gwrite_cycles);
444 rte_hash_reset(tbl_rw_test_param.h);
446 tbl_rw_test_param.num_insert = TOTAL_INSERT / 2 / core_cnt[n];
447 tbl_rw_test_param.rounded_tot_insert = TOTAL_INSERT / 2 +
448 tbl_rw_test_param.num_insert *
451 for (i = 0; i < TOTAL_INSERT / 2; i++) {
452 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
453 tbl_rw_test_param.keys + i,
454 (void *)((uintptr_t)i));
456 printf("Failed to insert half of keys\n");
461 /* Then test multiple thread case but only all reads or
465 /* Test only reader cases */
466 for (i = 0; i < core_cnt[n]; i++)
467 rte_eal_remote_launch(test_rw_reader,
468 (void *)(uintptr_t)read_cnt,
471 rte_eal_mp_wait_lcore();
474 /* Test only writer cases */
475 for (; i < core_cnt[n] * 2; i++)
476 rte_eal_remote_launch(test_rw_writer,
477 (void *)((uintptr_t)start_coreid),
480 rte_eal_mp_wait_lcore();
483 unsigned long long int cycles_per_insertion =
484 rte_atomic64_read(&gread_cycles) /
485 rte_atomic64_read(&greads);
486 perf_results->read_only[n] = cycles_per_insertion;
487 printf("Reader only: cycles per lookup: %llu\n",
488 cycles_per_insertion);
492 unsigned long long int cycles_per_insertion =
493 rte_atomic64_read(&gwrite_cycles) /
494 rte_atomic64_read(&gwrites);
495 perf_results->write_only[n] = cycles_per_insertion;
496 printf("Writer only: cycles per writes: %llu\n",
497 cycles_per_insertion);
500 rte_atomic64_clear(&greads);
501 rte_atomic64_clear(&gread_cycles);
502 rte_atomic64_clear(&gwrites);
503 rte_atomic64_clear(&gwrite_cycles);
505 rte_hash_reset(tbl_rw_test_param.h);
507 for (i = 0; i < TOTAL_INSERT / 2; i++) {
508 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
509 tbl_rw_test_param.keys + i,
510 (void *)((uintptr_t)i));
512 printf("Failed to insert half of keys\n");
517 start_coreid = core_cnt[n];
520 for (i = core_cnt[n]; i < core_cnt[n] * 2; i++)
521 rte_eal_remote_launch(test_rw_writer,
522 (void *)((uintptr_t)start_coreid),
524 for (i = 0; i < core_cnt[n]; i++)
525 rte_eal_remote_launch(test_rw_reader,
526 (void *)(uintptr_t)read_cnt,
529 for (i = 0; i < core_cnt[n]; i++)
530 rte_eal_remote_launch(test_rw_reader,
531 (void *)(uintptr_t)read_cnt,
533 for (; i < core_cnt[n] * 2; i++)
534 rte_eal_remote_launch(test_rw_writer,
535 (void *)((uintptr_t)start_coreid),
539 rte_eal_mp_wait_lcore();
542 memset(tbl_rw_test_param.found, 0, TOTAL_ENTRY);
543 while (rte_hash_iterate(tbl_rw_test_param.h,
544 &next_key, &next_data, &iter) >= 0) {
545 /* Search for the key in the list of keys added .*/
546 i = *(const uint32_t *)next_key;
547 tbl_rw_test_param.found[i]++;
550 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
551 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
552 if (tbl_rw_test_param.found[i] > 1) {
556 if (tbl_rw_test_param.found[i] == 0) {
558 printf("key %"PRIu64" is lost\n", i);
564 if (duplicated_keys > 0) {
565 printf("%d key duplicated\n", duplicated_keys);
570 printf("%d key lost\n", lost_keys);
574 printf("No key corrupted during read-write test.\n");
577 unsigned long long int cycles_per_insertion =
578 rte_atomic64_read(&gread_cycles) /
579 rte_atomic64_read(&greads);
580 perf_results->read_write_r[n] = cycles_per_insertion;
581 printf("Read-write cycles per lookup: %llu\n",
582 cycles_per_insertion);
586 unsigned long long int cycles_per_insertion =
587 rte_atomic64_read(&gwrite_cycles) /
588 rte_atomic64_read(&gwrites);
589 perf_results->read_write_w[n] = cycles_per_insertion;
590 printf("Read-write cycles per writes: %llu\n",
591 cycles_per_insertion);
596 rte_free(tbl_rw_test_param.found);
597 rte_free(tbl_rw_test_param.keys);
598 rte_hash_free(tbl_rw_test_param.h);
602 rte_free(tbl_rw_test_param.found);
603 rte_free(tbl_rw_test_param.keys);
604 rte_hash_free(tbl_rw_test_param.h);
611 test_hash_rw_perf_main(void)
614 * Variables used to choose different tests.
615 * use_htm indicates if hardware transactional memory should be used.
616 * reader_faster indicates if the reader threads should finish earlier
617 * than writer threads. This is to timing either reader threads or
618 * writer threads for performance numbers.
620 int use_htm, reader_faster;
621 unsigned int i = 0, core_id = 0;
623 if (rte_lcore_count() < 3) {
624 printf("Not enough cores for hash_readwrite_autotest, expecting at least 3\n");
628 RTE_LCORE_FOREACH_WORKER(core_id) {
629 worker_core_ids[i] = core_id;
633 setlocale(LC_NUMERIC, "");
635 if (rte_tm_supported()) {
636 printf("Hardware transactional memory (lock elision) "
639 printf("Test read-write with Hardware transactional memory\n");
644 if (test_hash_readwrite_perf(&htm_results, use_htm,
649 if (test_hash_readwrite_perf(&htm_results, use_htm,
653 printf("Hardware transactional memory (lock elision) "
654 "is NOT supported\n");
657 printf("Test read-write without Hardware transactional memory\n");
661 if (test_hash_readwrite_perf(&non_htm_results, use_htm,
665 if (test_hash_readwrite_perf(&non_htm_results, use_htm,
669 printf("================\n");
670 printf("Results summary:\n");
671 printf("================\n");
673 printf("single read: %u\n", htm_results.single_read);
674 printf("single write: %u\n", htm_results.single_write);
675 for (i = 0; i < NUM_TEST; i++) {
676 printf("+++ core_cnt: %u +++\n", core_cnt[i]);
678 printf(" read only: %u\n", htm_results.read_only[i]);
679 printf(" write only: %u\n", htm_results.write_only[i]);
680 printf(" read-write read: %u\n", htm_results.read_write_r[i]);
681 printf(" read-write write: %u\n", htm_results.read_write_w[i]);
683 printf("non HTM:\n");
684 printf(" read only: %u\n", non_htm_results.read_only[i]);
685 printf(" write only: %u\n", non_htm_results.write_only[i]);
686 printf(" read-write read: %u\n",
687 non_htm_results.read_write_r[i]);
688 printf(" read-write write: %u\n",
689 non_htm_results.read_write_w[i]);
696 test_hash_rw_func_main(void)
699 * Variables used to choose different tests.
700 * use_htm indicates if hardware transactional memory should be used.
701 * reader_faster indicates if the reader threads should finish earlier
702 * than writer threads. This is to timing either reader threads or
703 * writer threads for performance numbers.
705 unsigned int i = 0, core_id = 0;
707 if (rte_lcore_count() < 3) {
708 printf("Not enough cores for hash_readwrite_autotest, expecting at least 3\n");
712 RTE_LCORE_FOREACH_WORKER(core_id) {
713 worker_core_ids[i] = core_id;
717 setlocale(LC_NUMERIC, "");
719 if (rte_tm_supported()) {
720 printf("Hardware transactional memory (lock elision) "
723 printf("Test read-write with Hardware transactional memory\n");
725 /* htm = 1, rw_lf = 0, ext = 0 */
726 if (test_hash_readwrite_functional(1, 0, 0) < 0)
729 /* htm = 1, rw_lf = 1, ext = 0 */
730 if (test_hash_readwrite_functional(1, 1, 0) < 0)
733 /* htm = 1, rw_lf = 0, ext = 1 */
734 if (test_hash_readwrite_functional(1, 0, 1) < 0)
737 /* htm = 1, rw_lf = 1, ext = 1 */
738 if (test_hash_readwrite_functional(1, 1, 1) < 0)
741 printf("Hardware transactional memory (lock elision) "
742 "is NOT supported\n");
745 printf("Test read-write without Hardware transactional memory\n");
746 /* htm = 0, rw_lf = 0, ext = 0 */
747 if (test_hash_readwrite_functional(0, 0, 0) < 0)
750 /* htm = 0, rw_lf = 1, ext = 0 */
751 if (test_hash_readwrite_functional(0, 1, 0) < 0)
754 /* htm = 0, rw_lf = 0, ext = 1 */
755 if (test_hash_readwrite_functional(0, 0, 1) < 0)
758 /* htm = 0, rw_lf = 1, ext = 1 */
759 if (test_hash_readwrite_functional(0, 1, 1) < 0)
765 REGISTER_TEST_COMMAND(hash_readwrite_func_autotest, test_hash_rw_func_main);
766 REGISTER_TEST_COMMAND(hash_readwrite_perf_autotest, test_hash_rw_perf_main);