From 28ebff11c2dc2d6cc862f6c19ad1b1ec64fbcc3a Mon Sep 17 00:00:00 2001 From: Vladimir Medvedkin Date: Mon, 19 Apr 2021 16:59:51 +0100 Subject: [PATCH] hash: add predictable RSS This patch adds predictable RSS API. It is based on the idea of searching partial Toeplitz hash collisions. Signed-off-by: Vladimir Medvedkin Acked-by: Konstantin Ananyev Acked-by: Yipeng Wang Acked-by: John McNamara --- MAINTAINERS | 1 + app/test/test_thash.c | 469 +++++- .../prog_guide/img/predictable_snat_1.svg | 1444 +++++++++++++++++ .../prog_guide/img/predictable_snat_2.svg | 1444 +++++++++++++++++ doc/guides/prog_guide/toeplitz_hash_lib.rst | 247 +++ doc/guides/rel_notes/release_21_05.rst | 6 + lib/librte_hash/meson.build | 3 +- lib/librte_hash/rte_thash.c | 682 ++++++++ lib/librte_hash/rte_thash.h | 223 +++ lib/librte_hash/version.map | 8 + 10 files changed, 4520 insertions(+), 7 deletions(-) create mode 100644 doc/guides/prog_guide/img/predictable_snat_1.svg create mode 100644 doc/guides/prog_guide/img/predictable_snat_2.svg create mode 100644 lib/librte_hash/rte_thash.c diff --git a/MAINTAINERS b/MAINTAINERS index aa4dbbc764..35a87e87f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1430,6 +1430,7 @@ Hashes M: Yipeng Wang M: Sameh Gobriel M: Bruce Richardson +M: Vladimir Medvedkin F: lib/librte_hash/ F: doc/guides/prog_guide/hash_lib.rst F: doc/guides/prog_guide/toeplitz_hash_lib.rst diff --git a/app/test/test_thash.c b/app/test/test_thash.c index a6aadd1efa..d8981fbb7a 100644 --- a/app/test/test_thash.c +++ b/app/test/test_thash.c @@ -5,11 +5,15 @@ #include #include #include +#include #include "test.h" #include +#define HASH_MSK(reta_sz) ((1 << reta_sz) - 1) +#define TUPLE_SZ (RTE_THASH_V4_L4_LEN * 4) + struct test_thash_v4 { uint32_t dst_ip; uint32_t src_ip; @@ -75,7 +79,7 @@ uint8_t default_rss_key[] = { }; static int -test_thash(void) +test_toeplitz_hash_calc(void) { uint32_t i, j; union rte_thash_tuple tuple; @@ -100,7 +104,7 @@ test_thash(void) RTE_THASH_V4_L4_LEN, default_rss_key); if ((rss_l3 != v4_tbl[i].hash_l3) || (rss_l3l4 != v4_tbl[i].hash_l3l4)) - return -1; + return -TEST_FAILED; /*Calculate hash with converted key*/ rss_l3 = rte_softrss_be((uint32_t *)&tuple, RTE_THASH_V4_L3_LEN, rss_key_be); @@ -108,7 +112,7 @@ test_thash(void) RTE_THASH_V4_L4_LEN, rss_key_be); if ((rss_l3 != v4_tbl[i].hash_l3) || (rss_l3l4 != v4_tbl[i].hash_l3l4)) - return -1; + return -TEST_FAILED; } for (i = 0; i < RTE_DIM(v6_tbl); i++) { /*Fill ipv6 hdr*/ @@ -127,7 +131,7 @@ test_thash(void) RTE_THASH_V6_L4_LEN, default_rss_key); if ((rss_l3 != v6_tbl[i].hash_l3) || (rss_l3l4 != v6_tbl[i].hash_l3l4)) - return -1; + return -TEST_FAILED; /*Calculate hash with converted key*/ rss_l3 = rte_softrss_be((uint32_t *)&tuple, RTE_THASH_V6_L3_LEN, rss_key_be); @@ -135,9 +139,462 @@ test_thash(void) RTE_THASH_V6_L4_LEN, rss_key_be); if ((rss_l3 != v6_tbl[i].hash_l3) || (rss_l3l4 != v6_tbl[i].hash_l3l4)) - return -1; + return -TEST_FAILED; + } + return TEST_SUCCESS; +} + +static int +test_create_invalid(void) +{ + struct rte_thash_ctx *ctx; + int key_len = 40; + int reta_sz = 7; + + ctx = rte_thash_init_ctx(NULL, key_len, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx == NULL, + "Call succeeded with invalid parameters\n"); + + ctx = rte_thash_init_ctx("test", 0, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx == NULL, + "Call succeeded with invalid parameters\n"); + + ctx = rte_thash_init_ctx(NULL, key_len, 1, NULL, 0); + RTE_TEST_ASSERT(ctx == NULL, + "Call succeeded with invalid parameters\n"); + + ctx = rte_thash_init_ctx(NULL, key_len, 17, NULL, 0); + RTE_TEST_ASSERT(ctx == NULL, + "Call succeeded with invalid parameters\n"); + + return TEST_SUCCESS; +} + +static int +test_multiple_create(void) +{ + struct rte_thash_ctx *ctx; + int key_len = 40; + int reta_sz = 7; + int i; + + for (i = 0; i < 100; i++) { + ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n"); + + rte_thash_free_ctx(ctx); + } + + return TEST_SUCCESS; +} + +static int +test_free_null(void) +{ + struct rte_thash_ctx *ctx; + + ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n"); + + rte_thash_free_ctx(ctx); + rte_thash_free_ctx(NULL); + + return TEST_SUCCESS; +} + +static int +test_add_invalid_helper(void) +{ + struct rte_thash_ctx *ctx; + const int key_len = 40; + int reta_sz = 7; + int ret; + + ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n"); + + ret = rte_thash_add_helper(NULL, "test", reta_sz, 0); + RTE_TEST_ASSERT(ret == -EINVAL, + "Call succeeded with invalid parameters\n"); + + ret = rte_thash_add_helper(ctx, NULL, reta_sz, 0); + RTE_TEST_ASSERT(ret == -EINVAL, + "Call succeeded with invalid parameters\n"); + + ret = rte_thash_add_helper(ctx, "test", reta_sz - 1, 0); + RTE_TEST_ASSERT(ret == -EINVAL, + "Call succeeded with invalid parameters\n"); + + ret = rte_thash_add_helper(ctx, "test", reta_sz, key_len * 8); + RTE_TEST_ASSERT(ret == -EINVAL, + "Call succeeded with invalid parameters\n"); + + ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0); + RTE_TEST_ASSERT(ret == 0, "Can not create helper\n"); + + ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0); + RTE_TEST_ASSERT(ret == -EEXIST, + "Call succeeded with duplicated name\n"); + + /* + * Create second helper with offset 3 * reta_sz. + * Note first_range helper created range in key: + * [0, 32 + length{= reta_sz} - 1), i.e [0, 37). + * second range is [44, 81) + */ + ret = rte_thash_add_helper(ctx, "second_range", reta_sz, + 32 + 2 * reta_sz); + RTE_TEST_ASSERT(ret == 0, "Can not create helper\n"); + + /* + * Try to create overlapping with first_ and second_ ranges, + * i.e. [6, 49) + */ + ret = rte_thash_add_helper(ctx, "third_range", 2 * reta_sz, reta_sz); + RTE_TEST_ASSERT(ret == -EEXIST, + "Call succeeded with overlapping ranges\n"); + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +static int +test_find_existing(void) +{ + struct rte_thash_ctx *ctx, *ret_ctx; + + ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n"); + + ret_ctx = rte_thash_find_existing("test"); + RTE_TEST_ASSERT(ret_ctx != NULL, "can not find existing ctx\n"); + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +static int +test_get_helper(void) +{ + struct rte_thash_ctx *ctx; + struct rte_thash_subtuple_helper *h; + int ret; + + ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n"); + + h = rte_thash_get_helper(NULL, "first_range"); + RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n"); + + h = rte_thash_get_helper(ctx, NULL); + RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n"); + + ret = rte_thash_add_helper(ctx, "first_range", 8, 0); + RTE_TEST_ASSERT(ret == 0, "Can not create helper\n"); + + h = rte_thash_get_helper(ctx, "first_range"); + RTE_TEST_ASSERT(h != NULL, "Can not find helper\n"); + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +static int +test_period_overflow(void) +{ + struct rte_thash_ctx *ctx; + int reta_sz = 7; /* reflects polynomial degree */ + int ret; + + /* first create without RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */ + ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n"); + + /* requested range > (2^reta_sz) - 1 */ + ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz), 0); + RTE_TEST_ASSERT(ret == -ENOSPC, + "Call succeeded with invalid parameters\n"); + + /* requested range == len + 32 - 1, smaller than (2^reta_sz) - 1 */ + ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) - 32, 0); + RTE_TEST_ASSERT(ret == 0, "Can not create helper\n"); + + rte_thash_free_ctx(ctx); + + /* create with RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */ + ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL, + RTE_THASH_IGNORE_PERIOD_OVERFLOW); + RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n"); + + /* requested range > (2^reta_sz - 1) */ + ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) + 10, 0); + RTE_TEST_ASSERT(ret == 0, "Can not create helper\n"); + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +static int +test_predictable_rss_min_seq(void) +{ + struct rte_thash_ctx *ctx; + struct rte_thash_subtuple_helper *h; + const int key_len = 40; + int reta_sz = 6; + uint8_t initial_key[key_len]; + const uint8_t *new_key; + int ret; + union rte_thash_tuple tuple; + uint32_t orig_hash, adj_hash, adj; + unsigned int desired_value = 27 & HASH_MSK(reta_sz); + uint16_t port_value = 22; + + memset(initial_key, 0, key_len); + + ctx = rte_thash_init_ctx("test", key_len, reta_sz, initial_key, + RTE_THASH_MINIMAL_SEQ); + RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n"); + + ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8, + offsetof(union rte_thash_tuple, v4.sport) * 8); + RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret); + + h = rte_thash_get_helper(ctx, "snat"); + RTE_TEST_ASSERT(h != NULL, "can not find helper\n"); + + new_key = rte_thash_get_key(ctx); + tuple.v4.src_addr = RTE_IPV4(0, 0, 0, 0); + tuple.v4.dst_addr = RTE_IPV4(0, 0, 0, 0); + tuple.v4.sport = 0; + tuple.v4.sport = rte_cpu_to_be_16(port_value); + tuple.v4.dport = 0; + tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag); + + orig_hash = rte_softrss((uint32_t *)&tuple, + RTE_THASH_V4_L4_LEN, new_key); + adj = rte_thash_get_complement(h, orig_hash, desired_value); + + tuple.v4.sctp_tag = rte_cpu_to_be_32(tuple.v4.sctp_tag); + tuple.v4.sport ^= rte_cpu_to_be_16(adj); + tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag); + + adj_hash = rte_softrss((uint32_t *)&tuple, + RTE_THASH_V4_L4_LEN, new_key); + RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) == + desired_value, "bad desired value\n"); + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +/* + * This test creates 7 subranges in the following order: + * range_one = [56, 95), len = 8, offset = 56 + * range_two = [64, 103), len = 8, offset = 64 + * range_three = [120, 159), len = 8, offset = 120 + * range_four = [48, 87), len = 8, offset = 48 + * range_five = [57, 95), len = 7, offset = 57 + * range_six = [40, 111), len = 40, offset = 40 + * range_seven = [0, 39), len = 8, offset = 0 + */ +struct range { + const char *name; + int len; + int offset; + int byte_idx; +}; + +struct range rng_arr[] = { + {"one", 8, 56, 7}, + {"two", 8, 64, 8}, + {"three", 8, 120, 15}, + {"four", 8, 48, 6}, + {"six", 40, 40, 9}, + {"five", 7, 57, 7}, + {"seven", 8, 0, 0} +}; + +static int +test_predictable_rss_multirange(void) +{ + struct rte_thash_ctx *ctx; + struct rte_thash_subtuple_helper *h[RTE_DIM(rng_arr)]; + const uint8_t *new_key; + const int key_len = 40; + int reta_sz = 7; + unsigned int i, j, k; + int ret; + uint32_t desired_value = rte_rand() & HASH_MSK(reta_sz); + uint8_t tuples[RTE_DIM(rng_arr)][16] = { {0} }; + uint32_t *ptr; + uint32_t hashes[RTE_DIM(rng_arr)]; + uint32_t adj_hashes[RTE_DIM(rng_arr)]; + uint32_t adj; + + ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n"); + + for (i = 0; i < RTE_DIM(rng_arr); i++) { + ret = rte_thash_add_helper(ctx, rng_arr[i].name, + rng_arr[i].len, rng_arr[i].offset); + RTE_TEST_ASSERT(ret == 0, "can not add helper\n"); + + h[i] = rte_thash_get_helper(ctx, rng_arr[i].name); + RTE_TEST_ASSERT(h[i] != NULL, "can not find helper\n"); + } + new_key = rte_thash_get_key(ctx); + + /* + * calculate hashes, complements, then adjust keys with + * complements and recalsulate hashes + */ + for (i = 0; i < RTE_DIM(rng_arr); i++) { + for (k = 0; k < 100; k++) { + /* init with random keys */ + ptr = (uint32_t *)&tuples[i][0]; + for (j = 0; j < 4; j++) + ptr[j] = rte_rand(); + /* convert keys from BE to CPU byte order */ + for (j = 0; j < 4; j++) + ptr[j] = rte_be_to_cpu_32(ptr[j]); + + hashes[i] = rte_softrss(ptr, 4, new_key); + adj = rte_thash_get_complement(h[i], hashes[i], + desired_value); + /* convert back to BE to adjust the value */ + for (j = 0; j < 4; j++) + ptr[j] = rte_cpu_to_be_32(ptr[j]); + + tuples[i][rng_arr[i].byte_idx] ^= adj; + + for (j = 0; j < 4; j++) + ptr[j] = rte_be_to_cpu_32(ptr[j]); + + adj_hashes[i] = rte_softrss(ptr, 4, new_key); + RTE_TEST_ASSERT((adj_hashes[i] & HASH_MSK(reta_sz)) == + desired_value, + "bad desired value for %d tuple\n", i); + } } - return 0; + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +static int +cmp_tuple_eq(void *userdata, uint8_t *tuple) +{ + return memcmp(userdata, tuple, TUPLE_SZ); +} + +static int +test_adjust_tuple(void) +{ + struct rte_thash_ctx *ctx; + struct rte_thash_subtuple_helper *h; + const int key_len = 40; + const uint8_t *new_key; + uint8_t tuple[TUPLE_SZ]; + uint32_t tmp_tuple[TUPLE_SZ / sizeof(uint32_t)]; + uint32_t tuple_copy[TUPLE_SZ / sizeof(uint32_t)]; + uint32_t hash; + int reta_sz = CHAR_BIT; + int ret; + unsigned int i, desired_value = rte_rand() & HASH_MSK(reta_sz); + + memset(tuple, 0xab, TUPLE_SZ); + + ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0); + RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n"); + + /* + * set offset to be in the middle of a byte + * set size of the subtuple to be 2 * rets_sz + * to have the room for random bits + */ + ret = rte_thash_add_helper(ctx, "test", reta_sz * 2, + (5 * CHAR_BIT) + 4); + RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret); + + new_key = rte_thash_get_key(ctx); + + h = rte_thash_get_helper(ctx, "test"); + RTE_TEST_ASSERT(h != NULL, "can not find helper\n"); + + ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value, + 1, NULL, NULL); + RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret); + + for (i = 0; i < (TUPLE_SZ / 4); i++) + tmp_tuple[i] = + rte_be_to_cpu_32(*(uint32_t *)&tuple[i * 4]); + + hash = rte_softrss(tmp_tuple, TUPLE_SZ / 4, new_key); + RTE_TEST_ASSERT((hash & HASH_MSK(reta_sz)) == + desired_value, "bad desired value\n"); + + + /* Pass previously calculated tuple to callback function */ + memcpy(tuple_copy, tuple, TUPLE_SZ); + + memset(tuple, 0xab, TUPLE_SZ); + ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value, + 1, cmp_tuple_eq, tuple_copy); + RTE_TEST_ASSERT(ret == -EEXIST, + "adjust tuple didn't indicate collision\n"); + + /* + * Make the function to generate random bits into subtuple + * after first adjustment attempt. + */ + memset(tuple, 0xab, TUPLE_SZ); + ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value, + 2, cmp_tuple_eq, tuple_copy); + RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret); + + for (i = 0; i < (TUPLE_SZ / 4); i++) + tmp_tuple[i] = + rte_be_to_cpu_32(*(uint32_t *)&tuple[i * 4]); + + hash = rte_softrss(tmp_tuple, TUPLE_SZ / 4, new_key); + RTE_TEST_ASSERT((hash & HASH_MSK(reta_sz)) == + desired_value, "bad desired value\n"); + + rte_thash_free_ctx(ctx); + + return TEST_SUCCESS; +} + +static struct unit_test_suite thash_tests = { + .suite_name = "thash autotest", + .setup = NULL, + .teardown = NULL, + .unit_test_cases = { + TEST_CASE(test_toeplitz_hash_calc), + TEST_CASE(test_create_invalid), + TEST_CASE(test_multiple_create), + TEST_CASE(test_free_null), + TEST_CASE(test_add_invalid_helper), + TEST_CASE(test_find_existing), + TEST_CASE(test_get_helper), + TEST_CASE(test_period_overflow), + TEST_CASE(test_predictable_rss_min_seq), + TEST_CASE(test_predictable_rss_multirange), + TEST_CASE(test_adjust_tuple), + TEST_CASES_END() + } +}; + +static int +test_thash(void) +{ + return unit_test_suite_runner(&thash_tests); } REGISTER_TEST_COMMAND(thash_autotest, test_thash); diff --git a/doc/guides/prog_guide/img/predictable_snat_1.svg b/doc/guides/prog_guide/img/predictable_snat_1.svg new file mode 100644 index 0000000000..5f97ccb71d --- /dev/null +++ b/doc/guides/prog_guide/img/predictable_snat_1.svg @@ -0,0 +1,1444 @@ + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Router.1001 + + Sheet.1002 + + + + Sheet.1003 + + + + Sheet.1004 + + + + Sheet.1005 + + + + Sheet.1006 + + + + Sheet.1007 + + + + Sheet.1008 + + + + Sheet.1009 + + + + + + + + Array.1011 + + Sheet.1012 + + + + Sheet.1013 + + + + Sheet.1014 + + + + Sheet.1015 + + + + Sheet.1016 + 443 + + + + 443 + + + Sheet.1017 + 10000 + + + + 10000 + + + Sheet.1018 + 192.0.2.100 + + + + 192.0.2.100 + + + Sheet.1019 + 10.10.10.10 + + + + 10.10.10.10 + + + + + + + Array.1020 + + Sheet.1021 + + + + Sheet.1022 + + + + Sheet.1023 + + + + Sheet.1024 + + + + Sheet.1025 + 443 + + + + 443 + + + Sheet.1026 + 12345 + + + + 12345 + + + Sheet.1027 + 192.0.2.100 + + + + 192.0.2.100 + + + Sheet.1028 + 172.16.0.20 + + + + 172.16.0.20 + + + + Dynamic connector.1029 + + + + + + + + + + Array.1030 + + Sheet.1031 + + + + Sheet.1032 + + + + Sheet.1033 + + + + Sheet.1034 + + + + Sheet.1035 + 12345 + + + + 12345 + + + Sheet.1036 + 443 + + + + 443 + + + Sheet.1037 + 172.16.0.20 + + + + 172.16.0.20 + + + Sheet.1038 + 192.0.2.100 + + + + 192.0.2.100 + + + + + + + Array.1039 + + Sheet.1040 + + + + Sheet.1041 + + + + Sheet.1042 + + + + Sheet.1043 + + + + Sheet.1044 + 10000 + + + + 10000 + + + Sheet.1045 + 443 + + + + 443 + + + Sheet.1046 + 10.10.10.10 + + + + 10.10.10.10 + + + Sheet.1047 + 192.0.2.100 + + + + 192.0.2.100 + + + + Dynamic connector.1048 + + + + + + + Sheet.1049 + RSS hash value 0xdeadbeef Packet assigned to queue 15 + + + + RSS hash value 0xdeadbeefPacket assigned to queue 15 + + + Sheet.1051 + RSS hash value 0xbadcab1e Packet assigned to queue 14 + + + + RSS hash value 0xbadcab1ePacket assigned to queue 14 + + + \ No newline at end of file diff --git a/doc/guides/prog_guide/img/predictable_snat_2.svg b/doc/guides/prog_guide/img/predictable_snat_2.svg new file mode 100644 index 0000000000..85254590e1 --- /dev/null +++ b/doc/guides/prog_guide/img/predictable_snat_2.svg @@ -0,0 +1,1444 @@ + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Router.1001 + + Sheet.1002 + + + + Sheet.1003 + + + + Sheet.1004 + + + + Sheet.1005 + + + + Sheet.1006 + + + + Sheet.1007 + + + + Sheet.1008 + + + + Sheet.1009 + + + + + + + + Array.1011 + + Sheet.1012 + + + + Sheet.1013 + + + + Sheet.1014 + + + + Sheet.1015 + + + + Sheet.1016 + 443 + + + + 443 + + + Sheet.1017 + 10000 + + + + 10000 + + + Sheet.1018 + 192.0.2.100 + + + + 192.0.2.100 + + + Sheet.1019 + 10.10.10.10 + + + + 10.10.10.10 + + + + + + + Array.1020 + + Sheet.1021 + + + + Sheet.1022 + + + + Sheet.1023 + + + + Sheet.1024 + + + + Sheet.1025 + 443 + + + + 443 + + + Sheet.1026 + 23456 + + + + 23456 + + + Sheet.1027 + 192.0.2.100 + + + + 192.0.2.100 + + + Sheet.1028 + 172.16.0.20 + + + + 172.16.0.20 + + + + Dynamic connector.1029 + + + + + + + + + + Array.1030 + + Sheet.1031 + + + + Sheet.1032 + + + + Sheet.1033 + + + + Sheet.1034 + + + + Sheet.1035 + 23456 + + + + 23456 + + + Sheet.1036 + 443 + + + + 443 + + + Sheet.1037 + 172.16.0.20 + + + + 172.16.0.20 + + + Sheet.1038 + 192.0.2.100 + + + + 192.0.2.100 + + + + + + + Array.1039 + + Sheet.1040 + + + + Sheet.1041 + + + + Sheet.1042 + + + + Sheet.1043 + + + + Sheet.1044 + 10000 + + + + 10000 + + + Sheet.1045 + 443 + + + + 443 + + + Sheet.1046 + 10.10.10.10 + + + + 10.10.10.10 + + + Sheet.1047 + 192.0.2.100 + + + + 192.0.2.100 + + + + Dynamic connector.1048 + + + + + + + Sheet.1049 + RSS hash value 0xdeadbeef Packet assigned to queue 15 + + + + RSS hash value 0xdeadbeefPacket assigned to queue 15 + + + Sheet.1051 + RSS hash value 0xf00d1eaf Packet assigned to queue 15 + + + + RSS hash value 0xf00d1eafPacket assigned to queue 15 + + + \ No newline at end of file diff --git a/doc/guides/prog_guide/toeplitz_hash_lib.rst b/doc/guides/prog_guide/toeplitz_hash_lib.rst index 2480c551d0..83d5b0108f 100644 --- a/doc/guides/prog_guide/toeplitz_hash_lib.rst +++ b/doc/guides/prog_guide/toeplitz_hash_lib.rst @@ -36,3 +36,250 @@ The ``rte_softrss()`` function expects the ``rss_key`` to be exactly the same as the one installed on the NIC. The ``rte_softrss_be`` function is a faster implementation, but it expects ``rss_key`` to be converted to the host byte order. + + +Predictable RSS +--------------- + +In some usecases it is useful to have a way to find partial collisions of the +Toeplitz hash function. In figure :numref:`figure_rss_queue_assign` only a few +of the least significant bits (LSB) of the hash value are used to indicate an +entry in the RSS Redirection Table (ReTa) and thus the index of the queue. So, +in this case it would be useful to find another tuple whose hash has the same +LSB's as the hash from the original tuple. + +For example: + +- In the case of SNAT (Source Network Address Translation) it is possible to + find a special source port number on translation so that the hash of + returning packets, of the given connection, will have desired LSB's. +- In the case of MPLS (Multiprotocol Label Switching), if the MPLS tag is used + in the hash calculation, the Label Switching router can allocate a special + MPLS tag to bind an LSP (Label Switching Path) to a given queue. This method + can be used with the allocation of IPSec SPI, VXLan VNI, etc., to bind the + tunnel to the desired queue. +- In the case of a TCP stack, a special source port could be chosen for + outgoing connections, such that the response packets will be assigned to the + desired queue. + +This functionality is provided by the API shown below. +The API consists of 3 parts: + +* Create the thash context. + +* Create the thash helper, associated with a context. + +* Use the helper run time to calculate the adjustable bits of the tuple to + ensure a collision. + + +Thash context +~~~~~~~~~~~~~ + +The function ``rte_thash_init_ctx()`` initializes the context struct +associated with a particular NIC or a set of NICs. It expects: + +* The log2 value of the size of the RSS redirection table for the + corresponding NIC. It reflects the number of least significant bits of the + hash value to produce a collision for. + +* A predefined RSS hash key. This is optional, if ``NULL`` then a random key + will be initialized. + +* The length of the RSS hash key. This value is usually hardware/driver + specific and can be found in the NIC datasheet. + +* Optional flags, as shown below. + +Supported flags: + +* ``RTE_THASH_IGNORE_PERIOD_OVERFLOW`` - By default, and for security reasons, + the library prohibits generating a repeatable sequence in the hash key. This + flag disables such checking. The flag is mainly used for testing in the lab + to generate an RSS hash key with a uniform hash distribution, if the input + traffic also has a uniform distribution. + +* ``RTE_THASH_MINIMAL_SEQ`` - By default, the library generates a special bit + sequence in the hash key for all the bits of the subtuple. However, the + collision generation task requires only the ``log2(RETA_SZ)`` bits in the + subtuple. This flag forces the minimum bit sequence in the hash key to be + generated for the required ``log2(RETA_SZ)`` least significant bits of the + subtuple. The flag can be used in the case of a relatively large number of + helpers that may overlap with their corresponding bit sequences of RSS hash + keys. + + +Thash helper +~~~~~~~~~~~~ + +The function ``rte_thash_add_helper()`` initializes the helper struct +associated with a given context and a part of a target tuple of interest which +could be altered to produce a hash collision. On success it writes a specially +calculated bit sequence into the RSS hash key which is stored in the context +and calculates a table with values to be XORed with a subtuple. + +It expects: + +* A pointer to the Thash context to be associated with. + +* A length of the subtuple to be modified. The length is counted in bits. + +* An offset of the subtuple to be modified from the beginning of the tuple. It + is also counted in bits. + +.. note:: + + Adding a helper changes the key stored in the corresponding context. So the + updated RSS hash key must be uploaded into the NIC after creating all the + required helpers. + + +Calculation of the complementary bits to adjust the subtuple +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``rte_thash_get_complement()`` function returns a special bit sequence +with length ``N = log2(rss_reta_sz)`` (for the ``rss_reta_sz`` provided at +context initialization) to be xored with N least significant bits of the +subtuple. + +It expects: + +* A corresponding helper created for a given subtuple of the tuple. + +* A hash value of the tuple we want to alter. + +* The desired LSB's of the hash value the user expects to have. + +After the returned bit sequence has been XORed with the subtuple, the resulted +LSB's of the new hash value, calculated from the altered tuple, will be the +same as in ``desired_hash``. + + +Adjust tuple API +~~~~~~~~~~~~~~~~~ + +The ``rte_thash_get_complement()`` function is a user-friendly wrapper around +a number of other functions. It alters a passed tuple to meet the above +mentioned requirements around the desired hash LSB's. + +It expects: + +* A Thash context and helper. + +* A pointer to the tuple to be changed. + +* The length of the tuple. + +* A callback function and its userdata to check the tuple after it has been + changed. + +* The number of attempts to change the tuple. Basically, it makes sense if + there is a callback and a limit on the number of attempts to change the + tuple, if the callback function returns an error. + + +Usecase example +--------------- + +There could be a number of different usecases, such as NAT, TCP stack, MPLS +tag allocation, etc. In the following we will consider a SNAT application. + +Packets of a single bidirectional flow belonging to different directions can +end up being assigned to different queues and thus processed by different +lcores, as shown in :numref:`figure_predictable_snat_1`: + +.. _figure_predictable_snat_1: + +.. figure:: img/predictable_snat_1.* + + Bidirectional flow packets distribution in general + +That leads to a situation where the same packet flow can be shared between two +cores. Such a situation is not ideal from a performance perspective and +requires extra synchronization efforts that might lead to various performance +penalties, for example: + +* The connections table is global so locking/RCU on the flow insertion/removal + is required. + +* Connection metadata must be protected to avoid race conditions. + +* More cache pressure if a single connection metadata is kept in different + L1/L2 caches of a different CPU core. + +* Cache pressure/less cache locality on packet handover to the different cores. + +We can avoid all these penalties if it can be guaranteed that packets +belonging to one bidirectional flow will be assigned to the same queue, as +shown in :numref:`figure_predictable_snat_2`: + +.. _figure_predictable_snat_2: + +.. figure:: img/predictable_snat_2.* + + Bidirectional flow packets distribution with predictable RSS + + +To achieve this in a SNAT scenario it is possible to choose a source port not +randomly, but using the predictable RSS library to produce a partial hash +collision. This is shown in the code below. + +.. code-block:: c + + int key_len = 40; /* The default Niantic RSS key length. */ + + /** The default Niantic RSS reta size = 2^7 entries, LSBs of hash value are + * used as an indexes in RSS ReTa. */ + int reta_sz = 7; + int ret; + struct rte_thash_ctx *ctx; + + uint8_t initial_key[key_len] = {0}; /* Default empty key. */ + + /* Create and initialize a new thash context. */ + ctx = rte_thash_init_ctx("SNAT", key_len, reta_sz, initial_key, 0); + + /** Add a helper and specify the variable tuple part and its length. In the + * SNAT case we want to choose a new source port on SNAT translation in a + * way that the reverse tuple will have the same LSBs as the original + * direction tuple so that the selected source port will be the + * destination port on reply. + */ + ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8, + offsetof(union rte_thash_tuple, v4.dport) * 8); + + if (ret != 0) + return ret; + + /* Get handler of the required helper. */ + struct rte_thash_subtuple_helper *h = rte_thash_get_helper(ctx, "snat"); + + /** After calling rte_thash_add_helper() the initial_key passed on ctx + * creation has been changed so we get the new one. + */ + uint8_t *new_key = rte_thash_get_key(ctx); + + union rte_thash_tuple tuple, rev_tuple; + + /* A complete tuple from the packet. */ + complete_tuple(mbuf, &tuple); + + /* Calculate the RSS hash or get it from mbuf->hash.rss. */ + uint32_t orig_hash = rte_softrss((uint32_t *)&tuple, RTE_THASH_V4_L4_LEN, new_key); + + /** Complete the reverse tuple by translating the SRC address and swapping + * src and dst addresses and ports. + */ + get_rev_tuple(&rev_tuple, &tuple, new_ip); + + /* Calculate the expected rss hash for the reverse tuple. */ + uint32_t rev_hash = rte_softrss((uint32_t *)&rev_tuple, RTE_THASH_V4_L4_LEN, new_key); + + /* Get the adjustment bits for the src port to get a new port. */ + uint32_t adj = rte_thash_get_compliment(h, rev_hash, orig_hash); + + /* Adjust the source port bits. */ + uint16_t new_sport = tuple.v4.sport ^ adj; + + /* Make an actual packet translation. */ + do_snat(mbuf, new_ip, new_sport); diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst index 0ae862b684..565e764811 100644 --- a/doc/guides/rel_notes/release_21_05.rst +++ b/doc/guides/rel_notes/release_21_05.rst @@ -203,6 +203,12 @@ New Features the events across multiple stages. * This also reduced the scheduling overhead on a event device. +* **Added Predictable RSS functionality to the Toeplitz hash library.** + + Added feature for finding collisions of the Toeplitz hash function - + the hash function used in NICs to spread the traffic among the queues. + It can be used to get predictable mapping of the flows. + * **Updated testpmd.** * Added a command line option to configure forced speed for Ethernet port. diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index 242859f5ad..35460144d7 100644 --- a/lib/librte_hash/meson.build +++ b/lib/librte_hash/meson.build @@ -8,6 +8,7 @@ headers = files('rte_fbk_hash.h', 'rte_thash.h') indirect_headers += files('rte_crc_arm64.h') -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_thash.c') +deps += ['net'] deps += ['ring'] deps += ['rcu'] diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c new file mode 100644 index 0000000000..135a26d444 --- /dev/null +++ b/lib/librte_hash/rte_thash.c @@ -0,0 +1,682 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define THASH_NAME_LEN 64 +#define TOEPLITZ_HASH_LEN 32 + +#define RETA_SZ_IN_RANGE(reta_sz) ((reta_sz >= RTE_THASH_RETA_SZ_MIN) &&\ + (reta_sz <= RTE_THASH_RETA_SZ_MAX)) + +TAILQ_HEAD(rte_thash_list, rte_tailq_entry); +static struct rte_tailq_elem rte_thash_tailq = { + .name = "RTE_THASH", +}; +EAL_REGISTER_TAILQ(rte_thash_tailq) + +/** + * Table of some irreducible polinomials over GF(2). + * For lfsr they are reperesented in BE bit order, and + * x^0 is masked out. + * For example, poly x^5 + x^2 + 1 will be represented + * as (101001b & 11111b) = 01001b = 0x9 + */ +static const uint32_t irreducible_poly_table[][4] = { + {0, 0, 0, 0}, /** < degree 0 */ + {1, 1, 1, 1}, /** < degree 1 */ + {0x3, 0x3, 0x3, 0x3}, /** < degree 2 and so on... */ + {0x5, 0x3, 0x5, 0x3}, + {0x9, 0x3, 0x9, 0x3}, + {0x9, 0x1b, 0xf, 0x5}, + {0x21, 0x33, 0x1b, 0x2d}, + {0x41, 0x11, 0x71, 0x9}, + {0x71, 0xa9, 0xf5, 0x8d}, + {0x21, 0xd1, 0x69, 0x1d9}, + {0x81, 0x2c1, 0x3b1, 0x185}, + {0x201, 0x541, 0x341, 0x461}, + {0x941, 0x609, 0xe19, 0x45d}, + {0x1601, 0x1f51, 0x1171, 0x359}, + {0x2141, 0x2111, 0x2db1, 0x2109}, + {0x4001, 0x801, 0x101, 0x7301}, + {0x7781, 0xa011, 0x4211, 0x86d9}, +}; + +struct thash_lfsr { + uint32_t ref_cnt; + uint32_t poly; + /**< polynomial associated with the lfsr */ + uint32_t rev_poly; + /**< polynomial to generate the sequence in reverse direction */ + uint32_t state; + /**< current state of the lfsr */ + uint32_t rev_state; + /**< current state of the lfsr for reverse direction */ + uint32_t deg; /**< polynomial degree*/ + uint32_t bits_cnt; /**< number of bits generated by lfsr*/ +}; + +struct rte_thash_subtuple_helper { + char name[THASH_NAME_LEN]; /** < Name of subtuple configuration */ + LIST_ENTRY(rte_thash_subtuple_helper) next; + struct thash_lfsr *lfsr; + uint32_t offset; /** < Offset of the m-sequence */ + uint32_t len; /** < Length of the m-sequence */ + uint32_t tuple_offset; /** < Offset in bits of the subtuple */ + uint32_t tuple_len; /** < Length in bits of the subtuple */ + uint32_t lsb_msk; /** < (1 << reta_sz_log) - 1 */ + __extension__ uint32_t compl_table[0] __rte_cache_aligned; + /** < Complementary table */ +}; + +struct rte_thash_ctx { + char name[THASH_NAME_LEN]; + LIST_HEAD(, rte_thash_subtuple_helper) head; + uint32_t key_len; /** < Length of the NIC RSS hash key */ + uint32_t reta_sz_log; /** < size of the RSS ReTa in bits */ + uint32_t subtuples_nb; /** < number of subtuples */ + uint32_t flags; + uint8_t hash_key[0]; +}; + +static inline uint32_t +get_bit_lfsr(struct thash_lfsr *lfsr) +{ + uint32_t bit, ret; + + /* + * masking the TAP bits defined by the polynomial and + * calculating parity + */ + bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1; + ret = lfsr->state & 0x1; + lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) & + ((1 << lfsr->deg) - 1); + + lfsr->bits_cnt++; + return ret; +} + +static inline uint32_t +get_rev_bit_lfsr(struct thash_lfsr *lfsr) +{ + uint32_t bit, ret; + + bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1; + ret = lfsr->rev_state & (1 << (lfsr->deg - 1)); + lfsr->rev_state = ((lfsr->rev_state << 1) | bit) & + ((1 << lfsr->deg) - 1); + + lfsr->bits_cnt++; + return ret; +} + +static inline uint32_t +thash_get_rand_poly(uint32_t poly_degree) +{ + return irreducible_poly_table[poly_degree][rte_rand() % + RTE_DIM(irreducible_poly_table[poly_degree])]; +} + +static struct thash_lfsr * +alloc_lfsr(struct rte_thash_ctx *ctx) +{ + struct thash_lfsr *lfsr; + uint32_t i; + + if (ctx == NULL) + return NULL; + + lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0); + if (lfsr == NULL) + return NULL; + + lfsr->deg = ctx->reta_sz_log; + lfsr->poly = thash_get_rand_poly(lfsr->deg); + do { + lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1); + } while (lfsr->state == 0); + /* init reverse order polynomial */ + lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1)); + /* init proper rev_state*/ + lfsr->rev_state = lfsr->state; + for (i = 0; i <= lfsr->deg; i++) + get_rev_bit_lfsr(lfsr); + + /* clear bits_cnt after rev_state was inited */ + lfsr->bits_cnt = 0; + lfsr->ref_cnt = 1; + + return lfsr; +} + +static void +attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr) +{ + lfsr->ref_cnt++; + h->lfsr = lfsr; +} + +static void +free_lfsr(struct thash_lfsr *lfsr) +{ + lfsr->ref_cnt--; + if (lfsr->ref_cnt == 0) + rte_free(lfsr); +} + +struct rte_thash_ctx * +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz, + uint8_t *key, uint32_t flags) +{ + struct rte_thash_ctx *ctx; + struct rte_tailq_entry *te; + struct rte_thash_list *thash_list; + uint32_t i; + + if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) { + rte_errno = EINVAL; + return NULL; + } + + thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list); + + rte_mcfg_tailq_write_lock(); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, thash_list, next) { + ctx = (struct rte_thash_ctx *)te->data; + if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0) + break; + } + ctx = NULL; + if (te != NULL) { + rte_errno = EEXIST; + goto exit; + } + + /* allocate tailq entry */ + te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, HASH, + "Can not allocate tailq entry for thash context %s\n", + name); + rte_errno = ENOMEM; + goto exit; + } + + ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0); + if (ctx == NULL) { + RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n", + name); + rte_errno = ENOMEM; + goto free_te; + } + + rte_strlcpy(ctx->name, name, sizeof(ctx->name)); + ctx->key_len = key_len; + ctx->reta_sz_log = reta_sz; + LIST_INIT(&ctx->head); + ctx->flags = flags; + + if (key) + rte_memcpy(ctx->hash_key, key, key_len); + else { + for (i = 0; i < key_len; i++) + ctx->hash_key[i] = rte_rand(); + } + + te->data = (void *)ctx; + TAILQ_INSERT_TAIL(thash_list, te, next); + + rte_mcfg_tailq_write_unlock(); + + return ctx; +free_te: + rte_free(te); +exit: + rte_mcfg_tailq_write_unlock(); + return NULL; +} + +struct rte_thash_ctx * +rte_thash_find_existing(const char *name) +{ + struct rte_thash_ctx *ctx; + struct rte_tailq_entry *te; + struct rte_thash_list *thash_list; + + thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list); + + rte_mcfg_tailq_read_lock(); + TAILQ_FOREACH(te, thash_list, next) { + ctx = (struct rte_thash_ctx *)te->data; + if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0) + break; + } + + rte_mcfg_tailq_read_unlock(); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return ctx; +} + +void +rte_thash_free_ctx(struct rte_thash_ctx *ctx) +{ + struct rte_tailq_entry *te; + struct rte_thash_list *thash_list; + struct rte_thash_subtuple_helper *ent, *tmp; + + if (ctx == NULL) + return; + + thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list); + rte_mcfg_tailq_write_lock(); + TAILQ_FOREACH(te, thash_list, next) { + if (te->data == (void *)ctx) + break; + } + + if (te != NULL) + TAILQ_REMOVE(thash_list, te, next); + + rte_mcfg_tailq_write_unlock(); + ent = LIST_FIRST(&(ctx->head)); + while (ent) { + free_lfsr(ent->lfsr); + tmp = ent; + ent = LIST_NEXT(ent, next); + LIST_REMOVE(tmp, next); + rte_free(tmp); + } + + rte_free(ctx); + rte_free(te); +} + +static inline void +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) +{ + uint32_t byte_idx = pos / CHAR_BIT; + /* index of the bit int byte, indexing starts from MSB */ + uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1)); + uint8_t tmp; + + tmp = ptr[byte_idx]; + tmp &= ~(1 << bit_idx); + tmp |= bit << bit_idx; + ptr[byte_idx] = tmp; +} + +/** + * writes m-sequence to the hash_key for range [start, end] + * (i.e. including start and end positions) + */ +static int +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr, + uint32_t start, uint32_t end) +{ + uint32_t i; + uint32_t req_bits = (start < end) ? (end - start) : (start - end); + req_bits++; /* due to including end */ + + /* check if lfsr overflow period of the m-sequence */ + if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) && + ((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) != + RTE_THASH_IGNORE_PERIOD_OVERFLOW)) { + RTE_LOG(ERR, HASH, + "Can't generate m-sequence due to period overflow\n"); + return -ENOSPC; + } + + if (start < end) { + /* original direction (from left to right)*/ + for (i = start; i <= end; i++) + set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i); + + } else { + /* reverse direction (from right to left) */ + for (i = end; i >= start; i--) + set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i); + } + + return 0; +} + +static inline uint32_t +get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset) +{ + uint32_t *tmp, val; + + tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]); + val = rte_be_to_cpu_32(*tmp); + val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) + + ctx->reta_sz_log)); + + return val & ((1 << ctx->reta_sz_log) - 1); +} + +static inline void +generate_complement_table(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *h) +{ + int i, j, k; + uint32_t val; + uint32_t start; + + start = h->offset + h->len - (2 * ctx->reta_sz_log - 1); + + for (i = 1; i < (1 << ctx->reta_sz_log); i++) { + val = 0; + for (j = i; j; j &= (j - 1)) { + k = rte_bsf32(j); + val ^= get_subvalue(ctx, start - k + + ctx->reta_sz_log - 1); + } + h->compl_table[val] = i; + } +} + +static inline int +insert_before(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *ent, + struct rte_thash_subtuple_helper *cur_ent, + struct rte_thash_subtuple_helper *next_ent, + uint32_t start, uint32_t end, uint32_t range_end) +{ + int ret; + + if (end < cur_ent->offset) { + ent->lfsr = alloc_lfsr(ctx); + if (ent->lfsr == NULL) { + rte_free(ent); + return -ENOMEM; + } + /* generate nonoverlapping range [start, end) */ + ret = generate_subkey(ctx, ent->lfsr, start, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + } else if ((next_ent != NULL) && (end > next_ent->offset)) { + rte_free(ent); + RTE_LOG(ERR, HASH, + "Can't add helper %s due to conflict with existing" + " helper %s\n", ent->name, next_ent->name); + return -ENOSPC; + } + attach_lfsr(ent, cur_ent->lfsr); + + /** + * generate partially overlapping range + * [start, cur_ent->start) in reverse order + */ + ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + + if (end > range_end) { + /** + * generate partially overlapping range + * (range_end, end) + */ + ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + } + + LIST_INSERT_BEFORE(cur_ent, ent, next); + generate_complement_table(ctx, ent); + ctx->subtuples_nb++; + return 0; +} + +static inline int +insert_after(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *ent, + struct rte_thash_subtuple_helper *cur_ent, + struct rte_thash_subtuple_helper *next_ent, + struct rte_thash_subtuple_helper *prev_ent, + uint32_t end, uint32_t range_end) +{ + int ret; + + if ((next_ent != NULL) && (end > next_ent->offset)) { + rte_free(ent); + RTE_LOG(ERR, HASH, + "Can't add helper %s due to conflict with existing" + " helper %s\n", ent->name, next_ent->name); + return -EEXIST; + } + + attach_lfsr(ent, cur_ent->lfsr); + if (end > range_end) { + /** + * generate partially overlapping range + * (range_end, end) + */ + ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + } + + LIST_INSERT_AFTER(prev_ent, ent, next); + generate_complement_table(ctx, ent); + ctx->subtuples_nb++; + + return 0; +} + +int +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len, + uint32_t offset) +{ + struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent; + uint32_t start, end; + int ret; + + if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) || + ((offset + len + TOEPLITZ_HASH_LEN - 1) > + ctx->key_len * CHAR_BIT)) + return -EINVAL; + + /* Check for existing name*/ + LIST_FOREACH(cur_ent, &ctx->head, next) { + if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0) + return -EEXIST; + } + + end = offset + len + TOEPLITZ_HASH_LEN - 1; + start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) == + RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) : + offset; + + ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) + + sizeof(uint32_t) * (1 << ctx->reta_sz_log), + RTE_CACHE_LINE_SIZE); + if (ent == NULL) + return -ENOMEM; + + rte_strlcpy(ent->name, name, sizeof(ent->name)); + ent->offset = start; + ent->len = end - start; + ent->tuple_offset = offset; + ent->tuple_len = len; + ent->lsb_msk = (1 << ctx->reta_sz_log) - 1; + + cur_ent = LIST_FIRST(&ctx->head); + while (cur_ent) { + uint32_t range_end = cur_ent->offset + cur_ent->len; + next_ent = LIST_NEXT(cur_ent, next); + prev_ent = cur_ent; + /* Iterate through overlapping ranges */ + while ((next_ent != NULL) && (next_ent->offset < range_end)) { + range_end = RTE_MAX(next_ent->offset + next_ent->len, + range_end); + if (start > next_ent->offset) + prev_ent = next_ent; + + next_ent = LIST_NEXT(next_ent, next); + } + + if (start < cur_ent->offset) + return insert_before(ctx, ent, cur_ent, next_ent, + start, end, range_end); + else if (start < range_end) + return insert_after(ctx, ent, cur_ent, next_ent, + prev_ent, end, range_end); + + cur_ent = next_ent; + continue; + } + + ent->lfsr = alloc_lfsr(ctx); + if (ent->lfsr == NULL) { + rte_free(ent); + return -ENOMEM; + } + + /* generate nonoverlapping range [start, end) */ + ret = generate_subkey(ctx, ent->lfsr, start, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + if (LIST_EMPTY(&ctx->head)) { + LIST_INSERT_HEAD(&ctx->head, ent, next); + } else { + LIST_FOREACH(next_ent, &ctx->head, next) + prev_ent = next_ent; + + LIST_INSERT_AFTER(prev_ent, ent, next); + } + generate_complement_table(ctx, ent); + ctx->subtuples_nb++; + + return 0; +} + +struct rte_thash_subtuple_helper * +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name) +{ + struct rte_thash_subtuple_helper *ent; + + if ((ctx == NULL) || (name == NULL)) + return NULL; + + LIST_FOREACH(ent, &ctx->head, next) { + if (strncmp(name, ent->name, sizeof(ent->name)) == 0) + return ent; + } + + return NULL; +} + +uint32_t +rte_thash_get_complement(struct rte_thash_subtuple_helper *h, + uint32_t hash, uint32_t desired_hash) +{ + return h->compl_table[(hash ^ desired_hash) & h->lsb_msk]; +} + +const uint8_t * +rte_thash_get_key(struct rte_thash_ctx *ctx) +{ + return ctx->hash_key; +} + +static inline void +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) +{ + uint32_t byte_idx = pos >> 3; + uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1)); + uint8_t tmp; + + tmp = ptr[byte_idx]; + tmp ^= bit << bit_idx; + ptr[byte_idx] = tmp; +} + +int +rte_thash_adjust_tuple(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *h, + uint8_t *tuple, unsigned int tuple_len, + uint32_t desired_value, unsigned int attempts, + rte_thash_check_tuple_t fn, void *userdata) +{ + uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)]; + unsigned int i, j, ret = 0; + uint32_t hash, adj_bits; + uint8_t bit; + const uint8_t *hash_key; + + if ((ctx == NULL) || (h == NULL) || (tuple == NULL) || + (tuple_len % sizeof(uint32_t) != 0) || (attempts <= 0)) + return -EINVAL; + + hash_key = rte_thash_get_key(ctx); + + for (i = 0; i < attempts; i++) { + for (j = 0; j < (tuple_len / 4); j++) + tmp_tuple[j] = + rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]); + + hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key); + adj_bits = rte_thash_get_complement(h, hash, desired_value); + + /* + * Hint: LSB of adj_bits corresponds to + * offset + len bit of tuple + */ + for (j = 0; j < sizeof(uint32_t) * CHAR_BIT; j++) { + bit = (adj_bits >> j) & 0x1; + if (bit) + xor_bit(tuple, bit, h->tuple_offset + + h->tuple_len - 1 - j); + } + + if (fn != NULL) { + ret = (fn(userdata, tuple)) ? 0 : -EEXIST; + if (ret == 0) + return 0; + else if (i < (attempts - 1)) { + /* Update tuple with random bits */ + for (j = 0; j < h->tuple_len; j++) { + bit = rte_rand() & 0x1; + if (bit) + xor_bit(tuple, bit, + h->tuple_offset + + h->tuple_len - 1 - j); + } + } + } else + return 0; + } + + return ret; +} diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 061efa2ae1..76109fcdb0 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2015-2019 Vladimir Medvedkin + * Copyright(c) 2021 Intel Corporation */ #ifndef _RTE_THASH_H @@ -222,6 +223,228 @@ rte_softrss_be(uint32_t *input_tuple, uint32_t input_len, return ret; } +/** @internal Logarithm of minimum size of the RSS ReTa */ +#define RTE_THASH_RETA_SZ_MIN 2U +/** @internal Logarithm of maximum size of the RSS ReTa */ +#define RTE_THASH_RETA_SZ_MAX 16U + +/** + * LFSR will ignore if generated m-sequence has more than 2^n -1 bits, + * where n is the logarithm of the RSS ReTa size. + */ +#define RTE_THASH_IGNORE_PERIOD_OVERFLOW 0x1 +/** + * Generate minimal required bit (equal to ReTa LSB) sequence into + * the hash_key + */ +#define RTE_THASH_MINIMAL_SEQ 0x2 + +/** @internal thash context structure. */ +struct rte_thash_ctx; +/** @internal thash helper structure. */ +struct rte_thash_subtuple_helper; + +/** + * Create a new thash context. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param name + * Context name + * @param key_len + * Length of the toeplitz hash key + * @param reta_sz + * Logarithm of the NIC's Redirection Table (ReTa) size, + * i.e. number of the LSBs if the hash used to determine + * the reta entry. + * @param key + * Pointer to the key used to init an internal key state. + * Could be NULL, in this case internal key will be inited with random. + * @param flags + * Supported flags are: + * RTE_THASH_IGNORE_PERIOD_OVERFLOW + * RTE_THASH_MINIMAL_SEQ + * @return + * A pointer to the created context on success + * NULL otherwise + */ +__rte_experimental +struct rte_thash_ctx * +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz, + uint8_t *key, uint32_t flags); + +/** + * Find an existing thash context and return a pointer to it. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param name + * Name of the thash context + * @return + * Pointer to the thash context or NULL if it was not found with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +__rte_experimental +struct rte_thash_ctx * +rte_thash_find_existing(const char *name); + +/** + * Free a thash context object + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param ctx + * Thash context + * @return + * None + */ +__rte_experimental +void +rte_thash_free_ctx(struct rte_thash_ctx *ctx); + +/** + * Add a special properties to the toeplitz hash key inside a thash context. + * Creates an internal helper struct which has a complementary table + * to calculate toeplitz hash collisions. + * This function is not multi-thread safe. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param ctx + * Thash context + * @param name + * Name of the helper + * @param len + * Length in bits of the target subtuple + * Must be no shorter than reta_sz passed on rte_thash_init_ctx(). + * @param offset + * Offset in bits of the subtuple + * @return + * 0 on success + * negative on error + */ +__rte_experimental +int +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len, + uint32_t offset); + +/** + * Find a helper in the context by the given name + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param ctx + * Thash context + * @param name + * Name of the helper + * @return + * Pointer to the thash helper or NULL if it was not found. + */ +__rte_experimental +struct rte_thash_subtuple_helper * +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name); + +/** + * Get a complementary value for the subtuple to produce a + * partial toeplitz hash collision. It must be XOR'ed with the + * subtuple to produce the hash value with the desired hash LSB's + * This function is multi-thread safe. + * + * @param h + * Pointer to the helper struct + * @param hash + * Toeplitz hash value calculated for the given tuple + * @param desired_hash + * Desired hash value to find a collision for + * @return + * A complementary value which must be xored with the corresponding subtuple + */ +__rte_experimental +uint32_t +rte_thash_get_complement(struct rte_thash_subtuple_helper *h, + uint32_t hash, uint32_t desired_hash); + +/** + * Get a pointer to the toeplitz hash contained in the context. + * It changes after each addition of a helper. It should be installed to + * the NIC. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param ctx + * Thash context + * @return + * A pointer to the toeplitz hash key + */ +__rte_experimental +const uint8_t * +rte_thash_get_key(struct rte_thash_ctx *ctx); + +/** + * Function prototype for the rte_thash_adjust_tuple + * to check if adjusted tuple could be used. + * Generally it is some kind of lookup function to check + * if adjusted tuple is already in use. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param userdata + * Pointer to the userdata. It could be a pointer to the + * table with used tuples to search. + * @param tuple + * Pointer to the tuple to check + * + * @return + * 1 on success + * 0 otherwise + */ +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple); + +/** + * Adjusts tuple in the way to make Toeplitz hash has + * desired least significant bits. + * This function is multi-thread safe. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param ctx + * Thash context + * @param h + * Pointer to the helper struct + * @param tuple + * Pointer to the tuple to be adjusted + * @param tuple_len + * Length of the tuple. Must be multiple of 4. + * @param desired_value + * Desired value of least significant bits of the hash + * @param attempts + * Number of attempts to adjust tuple with fn() calling + * @param fn + * Callback function to check adjusted tuple. Could be NULL + * @param userdata + * Pointer to the userdata to be passed to fn(). Could be NULL + * + * @return + * 0 on success + * negative otherwise + */ +__rte_experimental +int +rte_thash_adjust_tuple(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *h, + uint8_t *tuple, unsigned int tuple_len, + uint32_t desired_value, unsigned int attempts, + rte_thash_check_tuple_t fn, void *userdata); + #ifdef __cplusplus } #endif diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index c6d73080f4..9b9519745c 100644 --- a/lib/librte_hash/version.map +++ b/lib/librte_hash/version.map @@ -37,4 +37,12 @@ EXPERIMENTAL { rte_hash_lookup_with_hash_bulk_data; rte_hash_max_key_id; rte_hash_rcu_qsbr_add; + rte_thash_add_helper; + rte_thash_adjust_tuple; + rte_thash_find_existing; + rte_thash_free_ctx; + rte_thash_get_complement; + rte_thash_get_helper; + rte_thash_get_key; + rte_thash_init_ctx; }; -- 2.20.1