test/hash: fix r/w test with non-consecutive cores
[dpdk.git] / test / test / test_hash_readwrite.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <inttypes.h>
6 #include <locale.h>
7
8 #include <rte_cycles.h>
9 #include <rte_hash.h>
10 #include <rte_hash_crc.h>
11 #include <rte_jhash.h>
12 #include <rte_launch.h>
13 #include <rte_malloc.h>
14 #include <rte_random.h>
15 #include <rte_spinlock.h>
16
17 #include "test.h"
18
19 #define RTE_RWTEST_FAIL 0
20
21 #define TOTAL_ENTRY (16*1024*1024)
22 #define TOTAL_INSERT (15*1024*1024)
23
24 #define NUM_TEST 3
25 unsigned int core_cnt[NUM_TEST] = {2, 4, 8};
26
27 unsigned int slave_core_ids[RTE_MAX_LCORE];
28 struct perf {
29         uint32_t single_read;
30         uint32_t single_write;
31         uint32_t read_only[NUM_TEST];
32         uint32_t write_only[NUM_TEST];
33         uint32_t read_write_r[NUM_TEST];
34         uint32_t read_write_w[NUM_TEST];
35 };
36
37 static struct perf htm_results, non_htm_results;
38
39 struct {
40         uint32_t *keys;
41         uint32_t *found;
42         uint32_t num_insert;
43         uint32_t rounded_tot_insert;
44         struct rte_hash *h;
45 } tbl_rw_test_param;
46
47 static rte_atomic64_t gcycles;
48 static rte_atomic64_t ginsertions;
49
50 static rte_atomic64_t gread_cycles;
51 static rte_atomic64_t gwrite_cycles;
52
53 static rte_atomic64_t greads;
54 static rte_atomic64_t gwrites;
55
56 static int
57 test_hash_readwrite_worker(__attribute__((unused)) void *arg)
58 {
59         uint64_t i, offset;
60         uint32_t lcore_id = rte_lcore_id();
61         uint64_t begin, cycles;
62         int ret;
63
64         for (i = 0; i < rte_lcore_count(); i++) {
65                 if (slave_core_ids[i] == lcore_id)
66                         break;
67         }
68         offset = tbl_rw_test_param.num_insert * i;
69
70         printf("Core #%d inserting and reading %d: %'"PRId64" - %'"PRId64"\n",
71                lcore_id, tbl_rw_test_param.num_insert,
72                offset, offset + tbl_rw_test_param.num_insert - 1);
73
74         begin = rte_rdtsc_precise();
75
76         for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
77
78                 if (rte_hash_lookup(tbl_rw_test_param.h,
79                                 tbl_rw_test_param.keys + i) > 0)
80                         break;
81
82                 ret = rte_hash_add_key(tbl_rw_test_param.h,
83                                      tbl_rw_test_param.keys + i);
84                 if (ret < 0)
85                         break;
86
87                 if (rte_hash_lookup(tbl_rw_test_param.h,
88                                 tbl_rw_test_param.keys + i) != ret)
89                         break;
90         }
91
92         cycles = rte_rdtsc_precise() - begin;
93         rte_atomic64_add(&gcycles, cycles);
94         rte_atomic64_add(&ginsertions, i - offset);
95
96         for (; i < offset + tbl_rw_test_param.num_insert; i++)
97                 tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
98
99         return 0;
100 }
101
102 static int
103 init_params(int use_htm, int use_jhash)
104 {
105         unsigned int i;
106
107         uint32_t *keys = NULL;
108         uint32_t *found = NULL;
109         struct rte_hash *handle;
110
111         struct rte_hash_parameters hash_params = {
112                 .entries = TOTAL_ENTRY,
113                 .key_len = sizeof(uint32_t),
114                 .hash_func_init_val = 0,
115                 .socket_id = rte_socket_id(),
116         };
117         if (use_jhash)
118                 hash_params.hash_func = rte_jhash;
119         else
120                 hash_params.hash_func = rte_hash_crc;
121
122         if (use_htm)
123                 hash_params.extra_flag =
124                         RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT |
125                         RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY;
126         else
127                 hash_params.extra_flag =
128                         RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY;
129
130         hash_params.name = "tests";
131
132         handle = rte_hash_create(&hash_params);
133         if (handle == NULL) {
134                 printf("hash creation failed");
135                 return -1;
136         }
137
138         tbl_rw_test_param.h = handle;
139         keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
140
141         if (keys == NULL) {
142                 printf("RTE_MALLOC failed\n");
143                 goto err;
144         }
145
146         found = rte_zmalloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
147         if (found == NULL) {
148                 printf("RTE_ZMALLOC failed\n");
149                 goto err;
150         }
151
152         tbl_rw_test_param.keys = keys;
153         tbl_rw_test_param.found = found;
154
155         for (i = 0; i < TOTAL_ENTRY; i++)
156                 keys[i] = i;
157
158         return 0;
159
160 err:
161         rte_free(keys);
162         rte_hash_free(handle);
163
164         return -1;
165 }
166
167 static int
168 test_hash_readwrite_functional(int use_htm)
169 {
170         unsigned int i;
171         const void *next_key;
172         void *next_data;
173         uint32_t iter = 0;
174
175         uint32_t duplicated_keys = 0;
176         uint32_t lost_keys = 0;
177         int use_jhash = 1;
178         int slave_cnt = rte_lcore_count() - 1;
179
180         rte_atomic64_init(&gcycles);
181         rte_atomic64_clear(&gcycles);
182
183         rte_atomic64_init(&ginsertions);
184         rte_atomic64_clear(&ginsertions);
185
186         if (init_params(use_htm, use_jhash) != 0)
187                 goto err;
188
189         tbl_rw_test_param.num_insert =
190                 TOTAL_INSERT / slave_cnt;
191
192         tbl_rw_test_param.rounded_tot_insert =
193                 tbl_rw_test_param.num_insert
194                 * slave_cnt;
195
196         printf("++++++++Start function tests:+++++++++\n");
197
198         /* Fire all threads. */
199         rte_eal_mp_remote_launch(test_hash_readwrite_worker,
200                                  NULL, SKIP_MASTER);
201         rte_eal_mp_wait_lcore();
202
203         while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
204                         &next_data, &iter) >= 0) {
205                 /* Search for the key in the list of keys added .*/
206                 i = *(const uint32_t *)next_key;
207                 tbl_rw_test_param.found[i]++;
208         }
209
210         for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
211                 if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
212                         if (tbl_rw_test_param.found[i] > 1) {
213                                 duplicated_keys++;
214                                 break;
215                         }
216                         if (tbl_rw_test_param.found[i] == 0) {
217                                 lost_keys++;
218                                 printf("key %d is lost\n", i);
219                                 break;
220                         }
221                 }
222         }
223
224         if (duplicated_keys > 0) {
225                 printf("%d key duplicated\n", duplicated_keys);
226                 goto err_free;
227         }
228
229         if (lost_keys > 0) {
230                 printf("%d key lost\n", lost_keys);
231                 goto err_free;
232         }
233
234         printf("No key corrupted during read-write test.\n");
235
236         unsigned long long int cycles_per_insertion =
237                 rte_atomic64_read(&gcycles) /
238                 rte_atomic64_read(&ginsertions);
239
240         printf("cycles per insertion and lookup: %llu\n", cycles_per_insertion);
241
242         rte_free(tbl_rw_test_param.found);
243         rte_free(tbl_rw_test_param.keys);
244         rte_hash_free(tbl_rw_test_param.h);
245         printf("+++++++++Complete function tests+++++++++\n");
246         return 0;
247
248 err_free:
249         rte_free(tbl_rw_test_param.found);
250         rte_free(tbl_rw_test_param.keys);
251         rte_hash_free(tbl_rw_test_param.h);
252 err:
253         return -1;
254 }
255
256 static int
257 test_rw_reader(void *arg)
258 {
259         uint64_t i;
260         uint64_t begin, cycles;
261         uint64_t read_cnt = (uint64_t)((uintptr_t)arg);
262
263         begin = rte_rdtsc_precise();
264         for (i = 0; i < read_cnt; i++) {
265                 void *data;
266                 rte_hash_lookup_data(tbl_rw_test_param.h,
267                                 tbl_rw_test_param.keys + i,
268                                 &data);
269                 if (i != (uint64_t)(uintptr_t)data) {
270                         printf("lookup find wrong value %"PRIu64","
271                                 "%"PRIu64"\n", i,
272                                 (uint64_t)(uintptr_t)data);
273                         break;
274                 }
275         }
276
277         cycles = rte_rdtsc_precise() - begin;
278         rte_atomic64_add(&gread_cycles, cycles);
279         rte_atomic64_add(&greads, i);
280         return 0;
281 }
282
283 static int
284 test_rw_writer(void *arg)
285 {
286         uint64_t i;
287         uint32_t lcore_id = rte_lcore_id();
288         uint64_t begin, cycles;
289         int ret;
290         uint64_t start_coreid = (uint64_t)(uintptr_t)arg;
291         uint64_t offset;
292
293         for (i = 0; i < rte_lcore_count(); i++) {
294                 if (slave_core_ids[i] == lcore_id)
295                         break;
296         }
297
298         offset = TOTAL_INSERT / 2 + (i - (start_coreid)) *
299                                 tbl_rw_test_param.num_insert;
300         begin = rte_rdtsc_precise();
301         for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
302                 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
303                                 tbl_rw_test_param.keys + i,
304                                 (void *)((uintptr_t)i));
305                 if (ret < 0) {
306                         printf("writer failed %"PRIu64"\n", i);
307                         break;
308                 }
309         }
310
311         cycles = rte_rdtsc_precise() - begin;
312         rte_atomic64_add(&gwrite_cycles, cycles);
313         rte_atomic64_add(&gwrites, tbl_rw_test_param.num_insert);
314         return 0;
315 }
316
317 static int
318 test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
319                                                         int reader_faster)
320 {
321         unsigned int n;
322         int ret;
323         int start_coreid;
324         uint64_t i, read_cnt;
325
326         const void *next_key;
327         void *next_data;
328         uint32_t iter = 0;
329         int use_jhash = 0;
330
331         uint32_t duplicated_keys = 0;
332         uint32_t lost_keys = 0;
333
334         uint64_t start = 0, end = 0;
335
336         rte_atomic64_init(&greads);
337         rte_atomic64_init(&gwrites);
338         rte_atomic64_clear(&gwrites);
339         rte_atomic64_clear(&greads);
340
341         rte_atomic64_init(&gread_cycles);
342         rte_atomic64_clear(&gread_cycles);
343         rte_atomic64_init(&gwrite_cycles);
344         rte_atomic64_clear(&gwrite_cycles);
345
346         if (init_params(use_htm, use_jhash) != 0)
347                 goto err;
348
349         /*
350          * Do a readers finish faster or writers finish faster test.
351          * When readers finish faster, we timing the readers, and when writers
352          * finish faster, we timing the writers.
353          * Divided by 10 or 2 is just experimental values to vary the workload
354          * of readers.
355          */
356         if (reader_faster) {
357                 printf("++++++Start perf test: reader++++++++\n");
358                 read_cnt = TOTAL_INSERT / 10;
359         } else {
360                 printf("++++++Start perf test: writer++++++++\n");
361                 read_cnt = TOTAL_INSERT / 2;
362         }
363
364         /* We first test single thread performance */
365         start = rte_rdtsc_precise();
366         /* Insert half of the keys */
367         for (i = 0; i < TOTAL_INSERT / 2; i++) {
368                 ret = rte_hash_add_key_data(tbl_rw_test_param.h,
369                                      tbl_rw_test_param.keys + i,
370                                         (void *)((uintptr_t)i));
371                 if (ret < 0) {
372                         printf("Failed to insert half of keys\n");
373                         goto err_free;
374                 }
375         }
376         end = rte_rdtsc_precise() - start;
377         perf_results->single_write = end / i;
378
379         start = rte_rdtsc_precise();
380
381         for (i = 0; i < read_cnt; i++) {
382                 void *data;
383                 rte_hash_lookup_data(tbl_rw_test_param.h,
384                                 tbl_rw_test_param.keys + i,
385                                 &data);
386                 if (i != (uint64_t)(uintptr_t)data) {
387                         printf("lookup find wrong value"
388                                         " %"PRIu64",%"PRIu64"\n", i,
389                                         (uint64_t)(uintptr_t)data);
390                         break;
391                 }
392         }
393         end = rte_rdtsc_precise() - start;
394         perf_results->single_read = end / i;
395
396         for (n = 0; n < NUM_TEST; n++) {
397                 unsigned int tot_slave_lcore = rte_lcore_count() - 1;
398                 if (tot_slave_lcore < core_cnt[n] * 2)
399                         goto finish;
400
401                 rte_atomic64_clear(&greads);
402                 rte_atomic64_clear(&gread_cycles);
403                 rte_atomic64_clear(&gwrites);
404                 rte_atomic64_clear(&gwrite_cycles);
405
406                 rte_hash_reset(tbl_rw_test_param.h);
407
408                 tbl_rw_test_param.num_insert = TOTAL_INSERT / 2 / core_cnt[n];
409                 tbl_rw_test_param.rounded_tot_insert = TOTAL_INSERT / 2 +
410                                                 tbl_rw_test_param.num_insert *
411                                                 core_cnt[n];
412
413                 for (i = 0; i < TOTAL_INSERT / 2; i++) {
414                         ret = rte_hash_add_key_data(tbl_rw_test_param.h,
415                                         tbl_rw_test_param.keys + i,
416                                         (void *)((uintptr_t)i));
417                         if (ret < 0) {
418                                 printf("Failed to insert half of keys\n");
419                                 goto err_free;
420                         }
421                 }
422
423                 /* Then test multiple thread case but only all reads or
424                  * all writes
425                  */
426
427                 /* Test only reader cases */
428                 for (i = 0; i < core_cnt[n]; i++)
429                         rte_eal_remote_launch(test_rw_reader,
430                                         (void *)(uintptr_t)read_cnt,
431                                         slave_core_ids[i]);
432
433                 rte_eal_mp_wait_lcore();
434
435                 start_coreid = i;
436                 /* Test only writer cases */
437                 for (; i < core_cnt[n] * 2; i++)
438                         rte_eal_remote_launch(test_rw_writer,
439                                         (void *)((uintptr_t)start_coreid),
440                                         slave_core_ids[i]);
441
442                 rte_eal_mp_wait_lcore();
443
444                 if (reader_faster) {
445                         unsigned long long int cycles_per_insertion =
446                                 rte_atomic64_read(&gread_cycles) /
447                                 rte_atomic64_read(&greads);
448                         perf_results->read_only[n] = cycles_per_insertion;
449                         printf("Reader only: cycles per lookup: %llu\n",
450                                                         cycles_per_insertion);
451                 }
452
453                 else {
454                         unsigned long long int cycles_per_insertion =
455                                 rte_atomic64_read(&gwrite_cycles) /
456                                 rte_atomic64_read(&gwrites);
457                         perf_results->write_only[n] = cycles_per_insertion;
458                         printf("Writer only: cycles per writes: %llu\n",
459                                                         cycles_per_insertion);
460                 }
461
462                 rte_atomic64_clear(&greads);
463                 rte_atomic64_clear(&gread_cycles);
464                 rte_atomic64_clear(&gwrites);
465                 rte_atomic64_clear(&gwrite_cycles);
466
467                 rte_hash_reset(tbl_rw_test_param.h);
468
469                 for (i = 0; i < TOTAL_INSERT / 2; i++) {
470                         ret = rte_hash_add_key_data(tbl_rw_test_param.h,
471                                         tbl_rw_test_param.keys + i,
472                                         (void *)((uintptr_t)i));
473                         if (ret < 0) {
474                                 printf("Failed to insert half of keys\n");
475                                 goto err_free;
476                         }
477                 }
478
479                 start_coreid = core_cnt[n];
480
481                 if (reader_faster) {
482                         for (i = core_cnt[n]; i < core_cnt[n] * 2; i++)
483                                 rte_eal_remote_launch(test_rw_writer,
484                                         (void *)((uintptr_t)start_coreid),
485                                         slave_core_ids[i]);
486                         for (i = 0; i < core_cnt[n]; i++)
487                                 rte_eal_remote_launch(test_rw_reader,
488                                         (void *)(uintptr_t)read_cnt,
489                                         slave_core_ids[i]);
490                 } else {
491                         for (i = 0; i < core_cnt[n]; i++)
492                                 rte_eal_remote_launch(test_rw_reader,
493                                         (void *)(uintptr_t)read_cnt,
494                                         slave_core_ids[i]);
495                         for (; i < core_cnt[n] * 2; i++)
496                                 rte_eal_remote_launch(test_rw_writer,
497                                         (void *)((uintptr_t)start_coreid),
498                                         slave_core_ids[i]);
499                 }
500
501                 rte_eal_mp_wait_lcore();
502
503                 while (rte_hash_iterate(tbl_rw_test_param.h,
504                                 &next_key, &next_data, &iter) >= 0) {
505                         /* Search for the key in the list of keys added .*/
506                         i = *(const uint32_t *)next_key;
507                         tbl_rw_test_param.found[i]++;
508                 }
509
510                 for (i = 0; i < tbl_rw_test_param.rounded_tot_insert; i++) {
511                         if (tbl_rw_test_param.keys[i] != RTE_RWTEST_FAIL) {
512                                 if (tbl_rw_test_param.found[i] > 1) {
513                                         duplicated_keys++;
514                                         break;
515                                 }
516                                 if (tbl_rw_test_param.found[i] == 0) {
517                                         lost_keys++;
518                                         printf("key %"PRIu64" is lost\n", i);
519                                         break;
520                                 }
521                         }
522                 }
523
524                 if (duplicated_keys > 0) {
525                         printf("%d key duplicated\n", duplicated_keys);
526                         goto err_free;
527                 }
528
529                 if (lost_keys > 0) {
530                         printf("%d key lost\n", lost_keys);
531                         goto err_free;
532                 }
533
534                 printf("No key corrupted during read-write test.\n");
535
536                 if (reader_faster) {
537                         unsigned long long int cycles_per_insertion =
538                                 rte_atomic64_read(&gread_cycles) /
539                                 rte_atomic64_read(&greads);
540                         perf_results->read_write_r[n] = cycles_per_insertion;
541                         printf("Read-write cycles per lookup: %llu\n",
542                                                         cycles_per_insertion);
543                 }
544
545                 else {
546                         unsigned long long int cycles_per_insertion =
547                                 rte_atomic64_read(&gwrite_cycles) /
548                                 rte_atomic64_read(&gwrites);
549                         perf_results->read_write_w[n] = cycles_per_insertion;
550                         printf("Read-write cycles per writes: %llu\n",
551                                                         cycles_per_insertion);
552                 }
553         }
554
555 finish:
556         rte_free(tbl_rw_test_param.found);
557         rte_free(tbl_rw_test_param.keys);
558         rte_hash_free(tbl_rw_test_param.h);
559         return 0;
560
561 err_free:
562         rte_free(tbl_rw_test_param.found);
563         rte_free(tbl_rw_test_param.keys);
564         rte_hash_free(tbl_rw_test_param.h);
565
566 err:
567         return -1;
568 }
569
570 static int
571 test_hash_readwrite_main(void)
572 {
573         /*
574          * Variables used to choose different tests.
575          * use_htm indicates if hardware transactional memory should be used.
576          * reader_faster indicates if the reader threads should finish earlier
577          * than writer threads. This is to timing either reader threads or
578          * writer threads for performance numbers.
579          */
580         int use_htm, reader_faster;
581         unsigned int i = 0, core_id = 0;
582
583         if (rte_lcore_count() <= 2) {
584                 printf("More than two lcores are required "
585                         "to do read write test\n");
586                 return 0;
587         }
588
589         RTE_LCORE_FOREACH_SLAVE(core_id) {
590                 slave_core_ids[i] = core_id;
591                 i++;
592         }
593
594         setlocale(LC_NUMERIC, "");
595
596         if (rte_tm_supported()) {
597                 printf("Hardware transactional memory (lock elision) "
598                         "is supported\n");
599
600                 printf("Test read-write with Hardware transactional memory\n");
601
602                 use_htm = 1;
603                 if (test_hash_readwrite_functional(use_htm) < 0)
604                         return -1;
605
606                 reader_faster = 1;
607                 if (test_hash_readwrite_perf(&htm_results, use_htm,
608                                                         reader_faster) < 0)
609                         return -1;
610
611                 reader_faster = 0;
612                 if (test_hash_readwrite_perf(&htm_results, use_htm,
613                                                         reader_faster) < 0)
614                         return -1;
615         } else {
616                 printf("Hardware transactional memory (lock elision) "
617                         "is NOT supported\n");
618         }
619
620         printf("Test read-write without Hardware transactional memory\n");
621         use_htm = 0;
622         if (test_hash_readwrite_functional(use_htm) < 0)
623                 return -1;
624         reader_faster = 1;
625         if (test_hash_readwrite_perf(&non_htm_results, use_htm,
626                                                         reader_faster) < 0)
627                 return -1;
628         reader_faster = 0;
629         if (test_hash_readwrite_perf(&non_htm_results, use_htm,
630                                                         reader_faster) < 0)
631                 return -1;
632
633         printf("Results summary:\n");
634
635         printf("single read: %u\n", htm_results.single_read);
636         printf("single write: %u\n", htm_results.single_write);
637         for (i = 0; i < NUM_TEST; i++) {
638                 printf("core_cnt: %u\n", core_cnt[i]);
639                 printf("HTM:\n");
640                 printf("read only: %u\n", htm_results.read_only[i]);
641                 printf("write only: %u\n", htm_results.write_only[i]);
642                 printf("read-write read: %u\n", htm_results.read_write_r[i]);
643                 printf("read-write write: %u\n", htm_results.read_write_w[i]);
644
645                 printf("non HTM:\n");
646                 printf("read only: %u\n", non_htm_results.read_only[i]);
647                 printf("write only: %u\n", non_htm_results.write_only[i]);
648                 printf("read-write read: %u\n",
649                         non_htm_results.read_write_r[i]);
650                 printf("read-write write: %u\n",
651                         non_htm_results.read_write_w[i]);
652         }
653
654         return 0;
655 }
656
657 REGISTER_TEST_COMMAND(hash_readwrite_autotest, test_hash_readwrite_main);