1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
8 #include <rte_cycles.h>
10 #include <rte_hash_crc.h>
11 #include <rte_jhash.h>
12 #include <rte_launch.h>
13 #include <rte_malloc.h>
14 #include <rte_random.h>
15 #include <rte_spinlock.h>
19 #define RTE_RWTEST_FAIL 0
21 #define TOTAL_ENTRY (16*1024*1024)
22 #define TOTAL_INSERT (15*1024*1024)
25 unsigned int core_cnt[NUM_TEST] = {2, 4, 8};
27 unsigned int slave_core_ids[RTE_MAX_LCORE];
30 uint32_t single_write;
31 uint32_t read_only[NUM_TEST];
32 uint32_t write_only[NUM_TEST];
33 uint32_t read_write_r[NUM_TEST];
34 uint32_t read_write_w[NUM_TEST];
37 static struct perf htm_results, non_htm_results;
43 uint32_t rounded_tot_insert;
47 static rte_atomic64_t gcycles;
48 static rte_atomic64_t ginsertions;
50 static rte_atomic64_t gread_cycles;
51 static rte_atomic64_t gwrite_cycles;
53 static rte_atomic64_t greads;
54 static rte_atomic64_t gwrites;
57 test_hash_readwrite_worker(__attribute__((unused)) void *arg)
60 uint32_t lcore_id = rte_lcore_id();
61 uint64_t begin, cycles;
64 for (i = 0; i < rte_lcore_count(); i++) {
65 if (slave_core_ids[i] == lcore_id)
68 offset = tbl_rw_test_param.num_insert * i;
70 printf("Core #%d inserting and reading %d: %'"PRId64" - %'"PRId64"\n",
71 lcore_id, tbl_rw_test_param.num_insert,
72 offset, offset + tbl_rw_test_param.num_insert - 1);
74 begin = rte_rdtsc_precise();
76 for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
78 if (rte_hash_lookup(tbl_rw_test_param.h,
79 tbl_rw_test_param.keys + i) > 0)
82 ret = rte_hash_add_key(tbl_rw_test_param.h,
83 tbl_rw_test_param.keys + i);
87 if (rte_hash_lookup(tbl_rw_test_param.h,
88 tbl_rw_test_param.keys + i) != ret)
92 cycles = rte_rdtsc_precise() - begin;
93 rte_atomic64_add(&gcycles, cycles);
94 rte_atomic64_add(&ginsertions, i - offset);
96 for (; i < offset + tbl_rw_test_param.num_insert; i++)
97 tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
103 init_params(int use_htm, int use_jhash)
107 uint32_t *keys = NULL;
108 uint32_t *found = NULL;
109 struct rte_hash *handle;
111 struct rte_hash_parameters hash_params = {
112 .entries = TOTAL_ENTRY,
113 .key_len = sizeof(uint32_t),
114 .hash_func_init_val = 0,
115 .socket_id = rte_socket_id(),
118 hash_params.hash_func = rte_jhash;
120 hash_params.hash_func = rte_hash_crc;
123 hash_params.extra_flag =
124 RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT |
125 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
126 RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
128 hash_params.extra_flag =
129 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
130 RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
132 hash_params.name = "tests";
134 handle = rte_hash_create(&hash_params);
135 if (handle == NULL) {
136 printf("hash creation failed");
140 tbl_rw_test_param.h = handle;
141 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
144 printf("RTE_MALLOC failed\n");
148 found = rte_zmalloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
150 printf("RTE_ZMALLOC failed\n");
154 tbl_rw_test_param.keys = keys;
155 tbl_rw_test_param.found = found;
157 for (i = 0; i < TOTAL_ENTRY; i++)
164 rte_hash_free(handle);
170 test_hash_readwrite_functional(int use_htm)
173 const void *next_key;
177 uint32_t duplicated_keys = 0;
178 uint32_t lost_keys = 0;
180 int slave_cnt = rte_lcore_count() - 1;
182 rte_atomic64_init(&gcycles);
183 rte_atomic64_clear(&gcycles);
185 rte_atomic64_init(&ginsertions);
186 rte_atomic64_clear(&ginsertions);
188 if (init_params(use_htm, use_jhash) != 0)
191 tbl_rw_test_param.num_insert =
192 TOTAL_INSERT / slave_cnt;
194 tbl_rw_test_param.rounded_tot_insert =
195 tbl_rw_test_param.num_insert
198 printf("++++++++Start function tests:+++++++++\n");
200 /* Fire all threads. */
201 rte_eal_mp_remote_launch(test_hash_readwrite_worker,
203 rte_eal_mp_wait_lcore();
205 while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
206 &next_data, &iter) >= 0) {
207 /* Search for the key in the list of keys added .*/
208 i = *(const uint32_t *)next_key;
209 tbl_rw_test_param.found[i]++;
212 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
213 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
214 if (tbl_rw_test_param.found[i] > 1) {
218 if (tbl_rw_test_param.found[i] == 0) {
220 printf("key %d is lost\n", i);
226 if (duplicated_keys > 0) {
227 printf("%d key duplicated\n", duplicated_keys);
232 printf("%d key lost\n", lost_keys);
236 printf("No key corrupted during read-write test.\n");
238 unsigned long long int cycles_per_insertion =
239 rte_atomic64_read(&gcycles) /
240 rte_atomic64_read(&ginsertions);
242 printf("cycles per insertion and lookup: %llu\n", cycles_per_insertion);
244 rte_free(tbl_rw_test_param.found);
245 rte_free(tbl_rw_test_param.keys);
246 rte_hash_free(tbl_rw_test_param.h);
247 printf("+++++++++Complete function tests+++++++++\n");
251 rte_free(tbl_rw_test_param.found);
252 rte_free(tbl_rw_test_param.keys);
253 rte_hash_free(tbl_rw_test_param.h);
259 test_rw_reader(void *arg)
262 uint64_t begin, cycles;
263 uint64_t read_cnt = (uint64_t)((uintptr_t)arg);
265 begin = rte_rdtsc_precise();
266 for (i = 0; i < read_cnt; i++) {
268 rte_hash_lookup_data(tbl_rw_test_param.h,
269 tbl_rw_test_param.keys + i,
271 if (i != (uint64_t)(uintptr_t)data) {
272 printf("lookup find wrong value %"PRIu64","
274 (uint64_t)(uintptr_t)data);
279 cycles = rte_rdtsc_precise() - begin;
280 rte_atomic64_add(&gread_cycles, cycles);
281 rte_atomic64_add(&greads, i);
286 test_rw_writer(void *arg)
289 uint32_t lcore_id = rte_lcore_id();
290 uint64_t begin, cycles;
292 uint64_t start_coreid = (uint64_t)(uintptr_t)arg;
295 for (i = 0; i < rte_lcore_count(); i++) {
296 if (slave_core_ids[i] == lcore_id)
300 offset = TOTAL_INSERT / 2 + (i - (start_coreid)) *
301 tbl_rw_test_param.num_insert;
302 begin = rte_rdtsc_precise();
303 for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
304 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
305 tbl_rw_test_param.keys + i,
306 (void *)((uintptr_t)i));
308 printf("writer failed %"PRIu64"\n", i);
313 cycles = rte_rdtsc_precise() - begin;
314 rte_atomic64_add(&gwrite_cycles, cycles);
315 rte_atomic64_add(&gwrites, tbl_rw_test_param.num_insert);
320 test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
326 uint64_t i, read_cnt;
328 const void *next_key;
333 uint32_t duplicated_keys = 0;
334 uint32_t lost_keys = 0;
336 uint64_t start = 0, end = 0;
338 rte_atomic64_init(&greads);
339 rte_atomic64_init(&gwrites);
340 rte_atomic64_clear(&gwrites);
341 rte_atomic64_clear(&greads);
343 rte_atomic64_init(&gread_cycles);
344 rte_atomic64_clear(&gread_cycles);
345 rte_atomic64_init(&gwrite_cycles);
346 rte_atomic64_clear(&gwrite_cycles);
348 if (init_params(use_htm, use_jhash) != 0)
352 * Do a readers finish faster or writers finish faster test.
353 * When readers finish faster, we timing the readers, and when writers
354 * finish faster, we timing the writers.
355 * Divided by 10 or 2 is just experimental values to vary the workload
359 printf("++++++Start perf test: reader++++++++\n");
360 read_cnt = TOTAL_INSERT / 10;
362 printf("++++++Start perf test: writer++++++++\n");
363 read_cnt = TOTAL_INSERT / 2;
366 /* We first test single thread performance */
367 start = rte_rdtsc_precise();
368 /* Insert half of the keys */
369 for (i = 0; i < TOTAL_INSERT / 2; i++) {
370 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
371 tbl_rw_test_param.keys + i,
372 (void *)((uintptr_t)i));
374 printf("Failed to insert half of keys\n");
378 end = rte_rdtsc_precise() - start;
379 perf_results->single_write = end / i;
381 start = rte_rdtsc_precise();
383 for (i = 0; i < read_cnt; i++) {
385 rte_hash_lookup_data(tbl_rw_test_param.h,
386 tbl_rw_test_param.keys + i,
388 if (i != (uint64_t)(uintptr_t)data) {
389 printf("lookup find wrong value"
390 " %"PRIu64",%"PRIu64"\n", i,
391 (uint64_t)(uintptr_t)data);
395 end = rte_rdtsc_precise() - start;
396 perf_results->single_read = end / i;
398 for (n = 0; n < NUM_TEST; n++) {
399 unsigned int tot_slave_lcore = rte_lcore_count() - 1;
400 if (tot_slave_lcore < core_cnt[n] * 2)
403 rte_atomic64_clear(&greads);
404 rte_atomic64_clear(&gread_cycles);
405 rte_atomic64_clear(&gwrites);
406 rte_atomic64_clear(&gwrite_cycles);
408 rte_hash_reset(tbl_rw_test_param.h);
410 tbl_rw_test_param.num_insert = TOTAL_INSERT / 2 / core_cnt[n];
411 tbl_rw_test_param.rounded_tot_insert = TOTAL_INSERT / 2 +
412 tbl_rw_test_param.num_insert *
415 for (i = 0; i < TOTAL_INSERT / 2; i++) {
416 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
417 tbl_rw_test_param.keys + i,
418 (void *)((uintptr_t)i));
420 printf("Failed to insert half of keys\n");
425 /* Then test multiple thread case but only all reads or
429 /* Test only reader cases */
430 for (i = 0; i < core_cnt[n]; i++)
431 rte_eal_remote_launch(test_rw_reader,
432 (void *)(uintptr_t)read_cnt,
435 rte_eal_mp_wait_lcore();
438 /* Test only writer cases */
439 for (; i < core_cnt[n] * 2; i++)
440 rte_eal_remote_launch(test_rw_writer,
441 (void *)((uintptr_t)start_coreid),
444 rte_eal_mp_wait_lcore();
447 unsigned long long int cycles_per_insertion =
448 rte_atomic64_read(&gread_cycles) /
449 rte_atomic64_read(&greads);
450 perf_results->read_only[n] = cycles_per_insertion;
451 printf("Reader only: cycles per lookup: %llu\n",
452 cycles_per_insertion);
456 unsigned long long int cycles_per_insertion =
457 rte_atomic64_read(&gwrite_cycles) /
458 rte_atomic64_read(&gwrites);
459 perf_results->write_only[n] = cycles_per_insertion;
460 printf("Writer only: cycles per writes: %llu\n",
461 cycles_per_insertion);
464 rte_atomic64_clear(&greads);
465 rte_atomic64_clear(&gread_cycles);
466 rte_atomic64_clear(&gwrites);
467 rte_atomic64_clear(&gwrite_cycles);
469 rte_hash_reset(tbl_rw_test_param.h);
471 for (i = 0; i < TOTAL_INSERT / 2; i++) {
472 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
473 tbl_rw_test_param.keys + i,
474 (void *)((uintptr_t)i));
476 printf("Failed to insert half of keys\n");
481 start_coreid = core_cnt[n];
484 for (i = core_cnt[n]; i < core_cnt[n] * 2; i++)
485 rte_eal_remote_launch(test_rw_writer,
486 (void *)((uintptr_t)start_coreid),
488 for (i = 0; i < core_cnt[n]; i++)
489 rte_eal_remote_launch(test_rw_reader,
490 (void *)(uintptr_t)read_cnt,
493 for (i = 0; i < core_cnt[n]; i++)
494 rte_eal_remote_launch(test_rw_reader,
495 (void *)(uintptr_t)read_cnt,
497 for (; i < core_cnt[n] * 2; i++)
498 rte_eal_remote_launch(test_rw_writer,
499 (void *)((uintptr_t)start_coreid),
503 rte_eal_mp_wait_lcore();
505 while (rte_hash_iterate(tbl_rw_test_param.h,
506 &next_key, &next_data, &iter) >= 0) {
507 /* Search for the key in the list of keys added .*/
508 i = *(const uint32_t *)next_key;
509 tbl_rw_test_param.found[i]++;
512 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
513 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
514 if (tbl_rw_test_param.found[i] > 1) {
518 if (tbl_rw_test_param.found[i] == 0) {
520 printf("key %"PRIu64" is lost\n", i);
526 if (duplicated_keys > 0) {
527 printf("%d key duplicated\n", duplicated_keys);
532 printf("%d key lost\n", lost_keys);
536 printf("No key corrupted during read-write test.\n");
539 unsigned long long int cycles_per_insertion =
540 rte_atomic64_read(&gread_cycles) /
541 rte_atomic64_read(&greads);
542 perf_results->read_write_r[n] = cycles_per_insertion;
543 printf("Read-write cycles per lookup: %llu\n",
544 cycles_per_insertion);
548 unsigned long long int cycles_per_insertion =
549 rte_atomic64_read(&gwrite_cycles) /
550 rte_atomic64_read(&gwrites);
551 perf_results->read_write_w[n] = cycles_per_insertion;
552 printf("Read-write cycles per writes: %llu\n",
553 cycles_per_insertion);
558 rte_free(tbl_rw_test_param.found);
559 rte_free(tbl_rw_test_param.keys);
560 rte_hash_free(tbl_rw_test_param.h);
564 rte_free(tbl_rw_test_param.found);
565 rte_free(tbl_rw_test_param.keys);
566 rte_hash_free(tbl_rw_test_param.h);
573 test_hash_readwrite_main(void)
576 * Variables used to choose different tests.
577 * use_htm indicates if hardware transactional memory should be used.
578 * reader_faster indicates if the reader threads should finish earlier
579 * than writer threads. This is to timing either reader threads or
580 * writer threads for performance numbers.
582 int use_htm, reader_faster;
583 unsigned int i = 0, core_id = 0;
585 if (rte_lcore_count() <= 2) {
586 printf("More than two lcores are required "
587 "to do read write test\n");
591 RTE_LCORE_FOREACH_SLAVE(core_id) {
592 slave_core_ids[i] = core_id;
596 setlocale(LC_NUMERIC, "");
598 if (rte_tm_supported()) {
599 printf("Hardware transactional memory (lock elision) "
602 printf("Test read-write with Hardware transactional memory\n");
605 if (test_hash_readwrite_functional(use_htm) < 0)
609 if (test_hash_readwrite_perf(&htm_results, use_htm,
614 if (test_hash_readwrite_perf(&htm_results, use_htm,
618 printf("Hardware transactional memory (lock elision) "
619 "is NOT supported\n");
622 printf("Test read-write without Hardware transactional memory\n");
624 if (test_hash_readwrite_functional(use_htm) < 0)
627 if (test_hash_readwrite_perf(&non_htm_results, use_htm,
631 if (test_hash_readwrite_perf(&non_htm_results, use_htm,
635 printf("Results summary:\n");
637 printf("single read: %u\n", htm_results.single_read);
638 printf("single write: %u\n", htm_results.single_write);
639 for (i = 0; i < NUM_TEST; i++) {
640 printf("core_cnt: %u\n", core_cnt[i]);
642 printf("read only: %u\n", htm_results.read_only[i]);
643 printf("write only: %u\n", htm_results.write_only[i]);
644 printf("read-write read: %u\n", htm_results.read_write_r[i]);
645 printf("read-write write: %u\n", htm_results.read_write_w[i]);
647 printf("non HTM:\n");
648 printf("read only: %u\n", non_htm_results.read_only[i]);
649 printf("write only: %u\n", non_htm_results.write_only[i]);
650 printf("read-write read: %u\n",
651 non_htm_results.read_write_r[i]);
652 printf("read-write write: %u\n",
653 non_htm_results.read_write_w[i]);
659 REGISTER_TEST_COMMAND(hash_readwrite_autotest, test_hash_readwrite_main);