table: add 16-byte hash operations computed on lookup
authorFan Zhang <roy.fan.zhang@intel.com>
Wed, 28 Oct 2015 17:11:17 +0000 (17:11 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Wed, 25 Nov 2015 23:45:50 +0000 (00:45 +0100)
This patch is to adding hash table operations for key signature
computed on lookup ("do-sig") for LRU hash tables and Extendible buckets.

Signed-off-by: Fan Zhang <roy.fan.zhang@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
lib/librte_table/rte_table_hash.h
lib/librte_table/rte_table_hash_key16.c

index e2c60e1..9d17516 100644 (file)
@@ -271,6 +271,10 @@ struct rte_table_hash_key16_lru_params {
 /** LRU hash table operations for pre-computed key signature */
 extern struct rte_table_ops rte_table_hash_key16_lru_ops;
 
+/** LRU hash table operations for key signature computed on lookup
+    ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops;
+
 /** Extendible bucket hash table parameters */
 struct rte_table_hash_key16_ext_params {
        /** Maximum number of entries (and keys) in the table */
@@ -301,6 +305,10 @@ struct rte_table_hash_key16_ext_params {
 /** Extendible bucket operations for pre-computed key signature */
 extern struct rte_table_ops rte_table_hash_key16_ext_ops;
 
+/** Extendible bucket hash table operations for key signature computed on
+    lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops;
+
 /**
  * 32-byte key hash tables
  *
index 5ad63bc..a3a5023 100644 (file)
@@ -620,6 +620,27 @@ rte_table_hash_entry_delete_key16_ext(
        rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
 }
 
+#define lookup1_stage1_dosig(mbuf1, bucket1, f)                        \
+{                                                              \
+       uint64_t *key;                                          \
+       uint64_t signature = 0;                         \
+       uint32_t bucket_index;                          \
+       uint64_t hash_key_buffer[2];            \
+                                                               \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
+                                                               \
+       hash_key_buffer[0] = key[0] & f->key_mask[0];   \
+       hash_key_buffer[1] = key[1] & f->key_mask[1];   \
+       signature = f->f_hash(hash_key_buffer,                  \
+                       RTE_TABLE_HASH_KEY_SIZE, f->seed);              \
+                                                               \
+       bucket_index = signature & (f->n_buckets - 1);          \
+       bucket1 = (struct rte_bucket_4_16 *)                    \
+               &f->memory[bucket_index * f->bucket_size];      \
+       rte_prefetch0(bucket1);                                 \
+       rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
+}
+
 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,         \
                pkts_mask_out, entries, f)                      \
 {                                                              \
@@ -769,6 +790,36 @@ rte_table_hash_entry_delete_key16_ext(
        rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
 }
 
+#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)    \
+{                                                              \
+       uint64_t *key10, *key11;                                        \
+       uint64_t hash_offset_buffer[2];                         \
+       uint64_t signature10, signature11;                      \
+       uint32_t bucket10_index, bucket11_index;        \
+                                                               \
+       key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\
+       hash_offset_buffer[0] = key10[0] & f->key_mask[0];      \
+       hash_offset_buffer[1] = key10[1] & f->key_mask[1];      \
+       signature10 = f->f_hash(hash_offset_buffer,                     \
+                       RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+       bucket10_index = signature10 & (f->n_buckets - 1);      \
+       bucket10 = (struct rte_bucket_4_16 *)                           \
+               &f->memory[bucket10_index * f->bucket_size];    \
+       rte_prefetch0(bucket10);                                \
+       rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
+                                                               \
+       key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\
+       hash_offset_buffer[0] = key11[0] & f->key_mask[0];      \
+       hash_offset_buffer[1] = key11[1] & f->key_mask[1];      \
+       signature11 = f->f_hash(hash_offset_buffer,                     \
+                       RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+       bucket11_index = signature11 & (f->n_buckets - 1);      \
+       bucket11 = (struct rte_bucket_4_16 *)                   \
+               &f->memory[bucket11_index * f->bucket_size];    \
+       rte_prefetch0(bucket11);                                \
+       rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
+}
+
 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
                bucket20, bucket21, pkts_mask_out, entries, f)  \
 {                                                              \
@@ -878,7 +929,8 @@ rte_table_hash_lookup_key16_lru(
                }
 
                *lookup_hit_mask = pkts_mask_out;
-               RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+               RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f,
+                       n_pkts_in - __builtin_popcountll(pkts_mask_out));
                return 0;
        }
 
@@ -968,10 +1020,140 @@ rte_table_hash_lookup_key16_lru(
                bucket20, bucket21, pkts_mask_out, entries, f);
 
        *lookup_hit_mask = pkts_mask_out;
-       RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+       RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+               __builtin_popcountll(pkts_mask_out));
        return 0;
 } /* rte_table_hash_lookup_key16_lru() */
 
+static int
+rte_table_hash_lookup_key16_lru_dosig(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index;
+       uint32_t pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0;
+
+       __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+       RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_16 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1_dosig(mbuf, bucket, f);
+                       lookup1_stage2_lru(pkt_index, mbuf, bucket,
+                               pkts_mask_out, entries, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+                       __builtin_popcountll(pkts_mask_out));
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+               pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+               pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                       mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+               bucket20, bucket21, pkts_mask_out, entries, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+               bucket20, bucket21, pkts_mask_out, entries, f);
+
+       *lookup_hit_mask = pkts_mask_out;
+       RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+               __builtin_popcountll(pkts_mask_out));
+       return 0;
+} /* rte_table_hash_lookup_key16_lru_dosig() */
+
 static int
 rte_table_hash_lookup_key16_ext(
        void *table,
@@ -1118,10 +1300,163 @@ grind_next_buckets:
        }
 
        *lookup_hit_mask = pkts_mask_out;
-       RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+       RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+               __builtin_popcountll(pkts_mask_out));
        return 0;
 } /* rte_table_hash_lookup_key16_ext() */
 
+static int
+rte_table_hash_lookup_key16_ext_dosig(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index;
+       uint32_t pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0, buckets_mask = 0;
+       struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+       uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+       RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_16 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1_dosig(mbuf, bucket, f);
+                       lookup1_stage2_ext(pkt_index, mbuf, bucket,
+                               pkts_mask_out, entries, buckets_mask,
+                               buckets, keys, f);
+               }
+
+               goto grind_next_buckets;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+               pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+               pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                       mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+               bucket20, bucket21, pkts_mask_out, entries,
+               buckets_mask, buckets, keys, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+               bucket20, bucket21, pkts_mask_out, entries,
+               buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+       /* Grind next buckets */
+       for ( ; buckets_mask; ) {
+               uint64_t buckets_mask_next = 0;
+
+               for ( ; buckets_mask; ) {
+                       uint64_t pkt_mask;
+                       uint32_t pkt_index;
+
+                       pkt_index = __builtin_ctzll(buckets_mask);
+                       pkt_mask = 1LLU << pkt_index;
+                       buckets_mask &= ~pkt_mask;
+
+                       lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+                               entries, buckets_mask_next, f);
+               }
+
+               buckets_mask = buckets_mask_next;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+               __builtin_popcountll(pkts_mask_out));
+       return 0;
+} /* rte_table_hash_lookup_key16_ext_dosig() */
+
 static int
 rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear)
 {
@@ -1147,6 +1482,15 @@ struct rte_table_ops rte_table_hash_key16_lru_ops = {
        .f_stats = rte_table_hash_key16_stats_read,
 };
 
+struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = {
+       .f_create = rte_table_hash_create_key16_lru,
+       .f_free = rte_table_hash_free_key16_lru,
+       .f_add = rte_table_hash_entry_add_key16_lru,
+       .f_delete = rte_table_hash_entry_delete_key16_lru,
+       .f_lookup = rte_table_hash_lookup_key16_lru_dosig,
+       .f_stats = rte_table_hash_key16_stats_read,
+};
+
 struct rte_table_ops rte_table_hash_key16_ext_ops = {
        .f_create = rte_table_hash_create_key16_ext,
        .f_free = rte_table_hash_free_key16_ext,
@@ -1157,3 +1501,12 @@ struct rte_table_ops rte_table_hash_key16_ext_ops = {
        .f_lookup = rte_table_hash_lookup_key16_ext,
        .f_stats = rte_table_hash_key16_stats_read,
 };
+
+struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = {
+       .f_create = rte_table_hash_create_key16_ext,
+       .f_free = rte_table_hash_free_key16_ext,
+       .f_add = rte_table_hash_entry_add_key16_ext,
+       .f_delete = rte_table_hash_entry_delete_key16_ext,
+       .f_lookup = rte_table_hash_lookup_key16_ext_dosig,
+       .f_stats = rte_table_hash_key16_stats_read,
+};