hash: separate multi-writer from r/w concurrency
[dpdk.git] / test / test / test_hash_readwrite.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <inttypes.h>
6 #include <locale.h>
7
8 #include <rte_cycles.h>
9 #include <rte_hash.h>
10 #include <rte_hash_crc.h>
11 #include <rte_jhash.h>
12 #include <rte_launch.h>
13 #include <rte_malloc.h>
14 #include <rte_random.h>
15 #include <rte_spinlock.h>
16
17 #include "test.h"
18
19 #define RTE_RWTEST_FAIL 0
20
21 #define TOTAL_ENTRY (16*1024*1024)
22 #define TOTAL_INSERT (15*1024*1024)
23
24 #define NUM_TEST 3
25 unsigned int core_cnt[NUM_TEST] = {2, 4, 8};
26
27 unsigned int slave_core_ids[RTE_MAX_LCORE];
28 struct perf {
29         uint32_t single_read;
30         uint32_t single_write;
31         uint32_t read_only[NUM_TEST];
32         uint32_t write_only[NUM_TEST];
33         uint32_t read_write_r[NUM_TEST];
34         uint32_t read_write_w[NUM_TEST];
35 };
36
37 static struct perf htm_results, non_htm_results;
38
39 struct {
40         uint32_t *keys;
41         uint32_t *found;
42         uint32_t num_insert;
43         uint32_t rounded_tot_insert;
44         struct rte_hash *h;
45 } tbl_rw_test_param;
46
47 static rte_atomic64_t gcycles;
48 static rte_atomic64_t ginsertions;
49
50 static rte_atomic64_t gread_cycles;
51 static rte_atomic64_t gwrite_cycles;
52
53 static rte_atomic64_t greads;
54 static rte_atomic64_t gwrites;
55
56 static int
57 test_hash_readwrite_worker(__attribute__((unused)) void *arg)
58 {
59         uint64_t i, offset;
60         uint32_t lcore_id = rte_lcore_id();
61         uint64_t begin, cycles;
62         int ret;
63
64         for (i = 0; i < rte_lcore_count(); i++) {
65                 if (slave_core_ids[i] == lcore_id)
66                         break;
67         }
68         offset = tbl_rw_test_param.num_insert * i;
69
70         printf("Core #%d inserting and reading %d: %'"PRId64" - %'"PRId64"\n",
71                lcore_id, tbl_rw_test_param.num_insert,
72                offset, offset + tbl_rw_test_param.num_insert - 1);
73
74         begin = rte_rdtsc_precise();
75
76         for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
77
78                 if (rte_hash_lookup(tbl_rw_test_param.h,
79                                 tbl_rw_test_param.keys + i) > 0)
80                         break;
81
82                 ret = rte_hash_add_key(tbl_rw_test_param.h,
83                                      tbl_rw_test_param.keys + i);
84                 if (ret < 0)
85                         break;
86
87                 if (rte_hash_lookup(tbl_rw_test_param.h,
88                                 tbl_rw_test_param.keys + i) != ret)
89                         break;
90         }
91
92         cycles = rte_rdtsc_precise() - begin;
93         rte_atomic64_add(&gcycles, cycles);
94         rte_atomic64_add(&ginsertions, i - offset);
95
96         for (; i < offset + tbl_rw_test_param.num_insert; i++)
97                 tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
98
99         return 0;
100 }
101
102 static int
103 init_params(int use_htm, int use_jhash)
104 {
105         unsigned int i;
106
107         uint32_t *keys = NULL;
108         uint32_t *found = NULL;
109         struct rte_hash *handle;
110
111         struct rte_hash_parameters hash_params = {
112                 .entries = TOTAL_ENTRY,
113                 .key_len = sizeof(uint32_t),
114                 .hash_func_init_val = 0,
115                 .socket_id = rte_socket_id(),
116         };
117         if (use_jhash)
118                 hash_params.hash_func = rte_jhash;
119         else
120                 hash_params.hash_func = rte_hash_crc;
121
122         if (use_htm)
123                 hash_params.extra_flag =
124                         RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT |
125                         RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
126                         RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
127         else
128                 hash_params.extra_flag =
129                         RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
130                         RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
131
132         hash_params.name = "tests";
133
134         handle = rte_hash_create(&hash_params);
135         if (handle == NULL) {
136                 printf("hash creation failed");
137                 return -1;
138         }
139
140         tbl_rw_test_param.h = handle;
141         keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
142
143         if (keys == NULL) {
144                 printf("RTE_MALLOC failed\n");
145                 goto err;
146         }
147
148         found = rte_zmalloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
149         if (found == NULL) {
150                 printf("RTE_ZMALLOC failed\n");
151                 goto err;
152         }
153
154         tbl_rw_test_param.keys = keys;
155         tbl_rw_test_param.found = found;
156
157         for (i = 0; i < TOTAL_ENTRY; i++)
158                 keys[i] = i;
159
160         return 0;
161
162 err:
163         rte_free(keys);
164         rte_hash_free(handle);
165
166         return -1;
167 }
168
169 static int
170 test_hash_readwrite_functional(int use_htm)
171 {
172         unsigned int i;
173         const void *next_key;
174         void *next_data;
175         uint32_t iter = 0;
176
177         uint32_t duplicated_keys = 0;
178         uint32_t lost_keys = 0;
179         int use_jhash = 1;
180         int slave_cnt = rte_lcore_count() - 1;
181
182         rte_atomic64_init(&gcycles);
183         rte_atomic64_clear(&gcycles);
184
185         rte_atomic64_init(&ginsertions);
186         rte_atomic64_clear(&ginsertions);
187
188         if (init_params(use_htm, use_jhash) != 0)
189                 goto err;
190
191         tbl_rw_test_param.num_insert =
192                 TOTAL_INSERT / slave_cnt;
193
194         tbl_rw_test_param.rounded_tot_insert =
195                 tbl_rw_test_param.num_insert
196                 * slave_cnt;
197
198         printf("++++++++Start function tests:+++++++++\n");
199
200         /* Fire all threads. */
201         rte_eal_mp_remote_launch(test_hash_readwrite_worker,
202                                  NULL, SKIP_MASTER);
203         rte_eal_mp_wait_lcore();
204
205         while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
206                         &next_data, &iter) >= 0) {
207                 /* Search for the key in the list of keys added .*/
208                 i = *(const uint32_t *)next_key;
209                 tbl_rw_test_param.found[i]++;
210         }
211
212         for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
213                 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
214                         if (tbl_rw_test_param.found[i] > 1) {
215                                 duplicated_keys++;
216                                 break;
217                         }
218                         if (tbl_rw_test_param.found[i] == 0) {
219                                 lost_keys++;
220                                 printf("key %d is lost\n", i);
221                                 break;
222                         }
223                 }
224         }
225
226         if (duplicated_keys > 0) {
227                 printf("%d key duplicated\n", duplicated_keys);
228                 goto err_free;
229         }
230
231         if (lost_keys > 0) {
232                 printf("%d key lost\n", lost_keys);
233                 goto err_free;
234         }
235
236         printf("No key corrupted during read-write test.\n");
237
238         unsigned long long int cycles_per_insertion =
239                 rte_atomic64_read(&gcycles) /
240                 rte_atomic64_read(&ginsertions);
241
242         printf("cycles per insertion and lookup: %llu\n", cycles_per_insertion);
243
244         rte_free(tbl_rw_test_param.found);
245         rte_free(tbl_rw_test_param.keys);
246         rte_hash_free(tbl_rw_test_param.h);
247         printf("+++++++++Complete function tests+++++++++\n");
248         return 0;
249
250 err_free:
251         rte_free(tbl_rw_test_param.found);
252         rte_free(tbl_rw_test_param.keys);
253         rte_hash_free(tbl_rw_test_param.h);
254 err:
255         return -1;
256 }
257
258 static int
259 test_rw_reader(void *arg)
260 {
261         uint64_t i;
262         uint64_t begin, cycles;
263         uint64_t read_cnt = (uint64_t)((uintptr_t)arg);
264
265         begin = rte_rdtsc_precise();
266         for (i = 0; i < read_cnt; i++) {
267                 void *data;
268                 rte_hash_lookup_data(tbl_rw_test_param.h,
269                                 tbl_rw_test_param.keys + i,
270                                 &data);
271                 if (i != (uint64_t)(uintptr_t)data) {
272                         printf("lookup find wrong value %"PRIu64","
273                                 "%"PRIu64"\n", i,
274                                 (uint64_t)(uintptr_t)data);
275                         break;
276                 }
277         }
278
279         cycles = rte_rdtsc_precise() - begin;
280         rte_atomic64_add(&gread_cycles, cycles);
281         rte_atomic64_add(&greads, i);
282         return 0;
283 }
284
285 static int
286 test_rw_writer(void *arg)
287 {
288         uint64_t i;
289         uint32_t lcore_id = rte_lcore_id();
290         uint64_t begin, cycles;
291         int ret;
292         uint64_t start_coreid = (uint64_t)(uintptr_t)arg;
293         uint64_t offset;
294
295         for (i = 0; i < rte_lcore_count(); i++) {
296                 if (slave_core_ids[i] == lcore_id)
297                         break;
298         }
299
300         offset = TOTAL_INSERT / 2 + (i - (start_coreid)) *
301                                 tbl_rw_test_param.num_insert;
302         begin = rte_rdtsc_precise();
303         for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
304                 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
305                                 tbl_rw_test_param.keys + i,
306                                 (void *)((uintptr_t)i));
307                 if (ret < 0) {
308                         printf("writer failed %"PRIu64"\n", i);
309                         break;
310                 }
311         }
312
313         cycles = rte_rdtsc_precise() - begin;
314         rte_atomic64_add(&gwrite_cycles, cycles);
315         rte_atomic64_add(&gwrites, tbl_rw_test_param.num_insert);
316         return 0;
317 }
318
319 static int
320 test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
321                                                         int reader_faster)
322 {
323         unsigned int n;
324         int ret;
325         int start_coreid;
326         uint64_t i, read_cnt;
327
328         const void *next_key;
329         void *next_data;
330         uint32_t iter = 0;
331         int use_jhash = 0;
332
333         uint32_t duplicated_keys = 0;
334         uint32_t lost_keys = 0;
335
336         uint64_t start = 0, end = 0;
337
338         rte_atomic64_init(&greads);
339         rte_atomic64_init(&gwrites);
340         rte_atomic64_clear(&gwrites);
341         rte_atomic64_clear(&greads);
342
343         rte_atomic64_init(&gread_cycles);
344         rte_atomic64_clear(&gread_cycles);
345         rte_atomic64_init(&gwrite_cycles);
346         rte_atomic64_clear(&gwrite_cycles);
347
348         if (init_params(use_htm, use_jhash) != 0)
349                 goto err;
350
351         /*
352          * Do a readers finish faster or writers finish faster test.
353          * When readers finish faster, we timing the readers, and when writers
354          * finish faster, we timing the writers.
355          * Divided by 10 or 2 is just experimental values to vary the workload
356          * of readers.
357          */
358         if (reader_faster) {
359                 printf("++++++Start perf test: reader++++++++\n");
360                 read_cnt = TOTAL_INSERT / 10;
361         } else {
362                 printf("++++++Start perf test: writer++++++++\n");
363                 read_cnt = TOTAL_INSERT / 2;
364         }
365
366         /* We first test single thread performance */
367         start = rte_rdtsc_precise();
368         /* Insert half of the keys */
369         for (i = 0; i < TOTAL_INSERT / 2; i++) {
370                 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
371                                      tbl_rw_test_param.keys + i,
372                                         (void *)((uintptr_t)i));
373                 if (ret < 0) {
374                         printf("Failed to insert half of keys\n");
375                         goto err_free;
376                 }
377         }
378         end = rte_rdtsc_precise() - start;
379         perf_results->single_write = end / i;
380
381         start = rte_rdtsc_precise();
382
383         for (i = 0; i < read_cnt; i++) {
384                 void *data;
385                 rte_hash_lookup_data(tbl_rw_test_param.h,
386                                 tbl_rw_test_param.keys + i,
387                                 &data);
388                 if (i != (uint64_t)(uintptr_t)data) {
389                         printf("lookup find wrong value"
390                                         " %"PRIu64",%"PRIu64"\n", i,
391                                         (uint64_t)(uintptr_t)data);
392                         break;
393                 }
394         }
395         end = rte_rdtsc_precise() - start;
396         perf_results->single_read = end / i;
397
398         for (n = 0; n < NUM_TEST; n++) {
399                 unsigned int tot_slave_lcore = rte_lcore_count() - 1;
400                 if (tot_slave_lcore < core_cnt[n] * 2)
401                         goto finish;
402
403                 rte_atomic64_clear(&greads);
404                 rte_atomic64_clear(&gread_cycles);
405                 rte_atomic64_clear(&gwrites);
406                 rte_atomic64_clear(&gwrite_cycles);
407
408                 rte_hash_reset(tbl_rw_test_param.h);
409
410                 tbl_rw_test_param.num_insert = TOTAL_INSERT / 2 / core_cnt[n];
411                 tbl_rw_test_param.rounded_tot_insert = TOTAL_INSERT / 2 +
412                                                 tbl_rw_test_param.num_insert *
413                                                 core_cnt[n];
414
415                 for (i = 0; i < TOTAL_INSERT / 2; i++) {
416                         ret = rte_hash_add_key_data(tbl_rw_test_param.h,
417                                         tbl_rw_test_param.keys + i,
418                                         (void *)((uintptr_t)i));
419                         if (ret < 0) {
420                                 printf("Failed to insert half of keys\n");
421                                 goto err_free;
422                         }
423                 }
424
425                 /* Then test multiple thread case but only all reads or
426                  * all writes
427                  */
428
429                 /* Test only reader cases */
430                 for (i = 0; i < core_cnt[n]; i++)
431                         rte_eal_remote_launch(test_rw_reader,
432                                         (void *)(uintptr_t)read_cnt,
433                                         slave_core_ids[i]);
434
435                 rte_eal_mp_wait_lcore();
436
437                 start_coreid = i;
438                 /* Test only writer cases */
439                 for (; i < core_cnt[n] * 2; i++)
440                         rte_eal_remote_launch(test_rw_writer,
441                                         (void *)((uintptr_t)start_coreid),
442                                         slave_core_ids[i]);
443
444                 rte_eal_mp_wait_lcore();
445
446                 if (reader_faster) {
447                         unsigned long long int cycles_per_insertion =
448                                 rte_atomic64_read(&gread_cycles) /
449                                 rte_atomic64_read(&greads);
450                         perf_results->read_only[n] = cycles_per_insertion;
451                         printf("Reader only: cycles per lookup: %llu\n",
452                                                         cycles_per_insertion);
453                 }
454
455                 else {
456                         unsigned long long int cycles_per_insertion =
457                                 rte_atomic64_read(&gwrite_cycles) /
458                                 rte_atomic64_read(&gwrites);
459                         perf_results->write_only[n] = cycles_per_insertion;
460                         printf("Writer only: cycles per writes: %llu\n",
461                                                         cycles_per_insertion);
462                 }
463
464                 rte_atomic64_clear(&greads);
465                 rte_atomic64_clear(&gread_cycles);
466                 rte_atomic64_clear(&gwrites);
467                 rte_atomic64_clear(&gwrite_cycles);
468
469                 rte_hash_reset(tbl_rw_test_param.h);
470
471                 for (i = 0; i < TOTAL_INSERT / 2; i++) {
472                         ret = rte_hash_add_key_data(tbl_rw_test_param.h,
473                                         tbl_rw_test_param.keys + i,
474                                         (void *)((uintptr_t)i));
475                         if (ret < 0) {
476                                 printf("Failed to insert half of keys\n");
477                                 goto err_free;
478                         }
479                 }
480
481                 start_coreid = core_cnt[n];
482
483                 if (reader_faster) {
484                         for (i = core_cnt[n]; i < core_cnt[n] * 2; i++)
485                                 rte_eal_remote_launch(test_rw_writer,
486                                         (void *)((uintptr_t)start_coreid),
487                                         slave_core_ids[i]);
488                         for (i = 0; i < core_cnt[n]; i++)
489                                 rte_eal_remote_launch(test_rw_reader,
490                                         (void *)(uintptr_t)read_cnt,
491                                         slave_core_ids[i]);
492                 } else {
493                         for (i = 0; i < core_cnt[n]; i++)
494                                 rte_eal_remote_launch(test_rw_reader,
495                                         (void *)(uintptr_t)read_cnt,
496                                         slave_core_ids[i]);
497                         for (; i < core_cnt[n] * 2; i++)
498                                 rte_eal_remote_launch(test_rw_writer,
499                                         (void *)((uintptr_t)start_coreid),
500                                         slave_core_ids[i]);
501                 }
502
503                 rte_eal_mp_wait_lcore();
504
505                 while (rte_hash_iterate(tbl_rw_test_param.h,
506                                 &next_key, &next_data, &iter) >= 0) {
507                         /* Search for the key in the list of keys added .*/
508                         i = *(const uint32_t *)next_key;
509                         tbl_rw_test_param.found[i]++;
510                 }
511
512                 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
513                         if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
514                                 if (tbl_rw_test_param.found[i] > 1) {
515                                         duplicated_keys++;
516                                         break;
517                                 }
518                                 if (tbl_rw_test_param.found[i] == 0) {
519                                         lost_keys++;
520                                         printf("key %"PRIu64" is lost\n", i);
521                                         break;
522                                 }
523                         }
524                 }
525
526                 if (duplicated_keys > 0) {
527                         printf("%d key duplicated\n", duplicated_keys);
528                         goto err_free;
529                 }
530
531                 if (lost_keys > 0) {
532                         printf("%d key lost\n", lost_keys);
533                         goto err_free;
534                 }
535
536                 printf("No key corrupted during read-write test.\n");
537
538                 if (reader_faster) {
539                         unsigned long long int cycles_per_insertion =
540                                 rte_atomic64_read(&gread_cycles) /
541                                 rte_atomic64_read(&greads);
542                         perf_results->read_write_r[n] = cycles_per_insertion;
543                         printf("Read-write cycles per lookup: %llu\n",
544                                                         cycles_per_insertion);
545                 }
546
547                 else {
548                         unsigned long long int cycles_per_insertion =
549                                 rte_atomic64_read(&gwrite_cycles) /
550                                 rte_atomic64_read(&gwrites);
551                         perf_results->read_write_w[n] = cycles_per_insertion;
552                         printf("Read-write cycles per writes: %llu\n",
553                                                         cycles_per_insertion);
554                 }
555         }
556
557 finish:
558         rte_free(tbl_rw_test_param.found);
559         rte_free(tbl_rw_test_param.keys);
560         rte_hash_free(tbl_rw_test_param.h);
561         return 0;
562
563 err_free:
564         rte_free(tbl_rw_test_param.found);
565         rte_free(tbl_rw_test_param.keys);
566         rte_hash_free(tbl_rw_test_param.h);
567
568 err:
569         return -1;
570 }
571
572 static int
573 test_hash_readwrite_main(void)
574 {
575         /*
576          * Variables used to choose different tests.
577          * use_htm indicates if hardware transactional memory should be used.
578          * reader_faster indicates if the reader threads should finish earlier
579          * than writer threads. This is to timing either reader threads or
580          * writer threads for performance numbers.
581          */
582         int use_htm, reader_faster;
583         unsigned int i = 0, core_id = 0;
584
585         if (rte_lcore_count() <= 2) {
586                 printf("More than two lcores are required "
587                         "to do read write test\n");
588                 return 0;
589         }
590
591         RTE_LCORE_FOREACH_SLAVE(core_id) {
592                 slave_core_ids[i] = core_id;
593                 i++;
594         }
595
596         setlocale(LC_NUMERIC, "");
597
598         if (rte_tm_supported()) {
599                 printf("Hardware transactional memory (lock elision) "
600                         "is supported\n");
601
602                 printf("Test read-write with Hardware transactional memory\n");
603
604                 use_htm = 1;
605                 if (test_hash_readwrite_functional(use_htm) < 0)
606                         return -1;
607
608                 reader_faster = 1;
609                 if (test_hash_readwrite_perf(&htm_results, use_htm,
610                                                         reader_faster) < 0)
611                         return -1;
612
613                 reader_faster = 0;
614                 if (test_hash_readwrite_perf(&htm_results, use_htm,
615                                                         reader_faster) < 0)
616                         return -1;
617         } else {
618                 printf("Hardware transactional memory (lock elision) "
619                         "is NOT supported\n");
620         }
621
622         printf("Test read-write without Hardware transactional memory\n");
623         use_htm = 0;
624         if (test_hash_readwrite_functional(use_htm) < 0)
625                 return -1;
626         reader_faster = 1;
627         if (test_hash_readwrite_perf(&non_htm_results, use_htm,
628                                                         reader_faster) < 0)
629                 return -1;
630         reader_faster = 0;
631         if (test_hash_readwrite_perf(&non_htm_results, use_htm,
632                                                         reader_faster) < 0)
633                 return -1;
634
635         printf("Results summary:\n");
636
637         printf("single read: %u\n", htm_results.single_read);
638         printf("single write: %u\n", htm_results.single_write);
639         for (i = 0; i < NUM_TEST; i++) {
640                 printf("core_cnt: %u\n", core_cnt[i]);
641                 printf("HTM:\n");
642                 printf("read only: %u\n", htm_results.read_only[i]);
643                 printf("write only: %u\n", htm_results.write_only[i]);
644                 printf("read-write read: %u\n", htm_results.read_write_r[i]);
645                 printf("read-write write: %u\n", htm_results.read_write_w[i]);
646
647                 printf("non HTM:\n");
648                 printf("read only: %u\n", non_htm_results.read_only[i]);
649                 printf("write only: %u\n", non_htm_results.write_only[i]);
650                 printf("read-write read: %u\n",
651                         non_htm_results.read_write_r[i]);
652                 printf("read-write write: %u\n",
653                         non_htm_results.read_write_w[i]);
654         }
655
656         return 0;
657 }
658
659 REGISTER_TEST_COMMAND(hash_readwrite_autotest, test_hash_readwrite_main);