- uint64_t extra_hits_mask = 0;
- uint64_t lookup_mask, miss_mask;
- unsigned idx;
- const void *key_store = h->key_store;
- int ret;
- hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX];
-
- unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
- const struct rte_hash_bucket *primary_bkt10, *primary_bkt11;
- const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11;
- const struct rte_hash_bucket *primary_bkt20, *primary_bkt21;
- const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21;
- const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
- hash_sig_t primary_hash10, primary_hash11;
- hash_sig_t secondary_hash10, secondary_hash11;
- hash_sig_t primary_hash20, primary_hash21;
- hash_sig_t secondary_hash20, secondary_hash21;
-
- lookup_mask = (uint64_t) -1 >> (64 - num_keys);
- miss_mask = lookup_mask;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
-
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
-
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
- lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
- secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
- secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
- key_store, h);
-
- while (lookup_mask) {
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
- lookup_stage2(idx20, primary_hash20, secondary_hash20,
- primary_bkt20, secondary_bkt20, &k_slot20, positions,
- &extra_hits_mask, key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21,
- primary_bkt21, secondary_bkt21, &k_slot21, positions,
- &extra_hits_mask, key_store, h);
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
- }
-
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
- lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
- secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
- secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
-
- lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
- secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
- secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
-
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
- /* ignore any items we have already found */
- extra_hits_mask &= ~hits;
-
- if (unlikely(extra_hits_mask)) {
- /* run a single search for each remaining item */
- do {
- idx = __builtin_ctzl(extra_hits_mask);
- if (data != NULL) {
- ret = rte_hash_lookup_with_hash_data(h,
- keys[idx], hash_vals[idx], &data[idx]);
- if (ret >= 0)
- hits |= 1ULL << idx;
- } else {
- positions[idx] = rte_hash_lookup_with_hash(h,
- keys[idx], hash_vals[idx]);
- if (positions[idx] >= 0)
- hits |= 1llu << idx;
+ int32_t i;
+ int32_t ret;
+ uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+ uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
+ const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+ const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+ uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+ struct rte_hash_bucket *cur_bkt, *next_bkt;
+ void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t cnt_b, cnt_a;
+
+ /* Prefetch first keys */
+ for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
+ rte_prefetch0(keys[i]);
+
+ /*
+ * Prefetch rest of the keys, calculate primary and
+ * secondary bucket and prefetch them
+ */
+ for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
+ rte_prefetch0(keys[i + PREFETCH_OFFSET]);
+
+ prim_hash[i] = rte_hash_hash(h, keys[i]);
+
+ sig[i] = get_short_sig(prim_hash[i]);
+ prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+ sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+ primary_bkt[i] = &h->buckets[prim_index[i]];
+ secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+ rte_prefetch0(primary_bkt[i]);
+ rte_prefetch0(secondary_bkt[i]);
+ }
+
+ /* Calculate and prefetch rest of the buckets */
+ for (; i < num_keys; i++) {
+ prim_hash[i] = rte_hash_hash(h, keys[i]);
+
+ sig[i] = get_short_sig(prim_hash[i]);
+ prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+ sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+ primary_bkt[i] = &h->buckets[prim_index[i]];
+ secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+ rte_prefetch0(primary_bkt[i]);
+ rte_prefetch0(secondary_bkt[i]);
+ }
+
+ do {
+ /* Load the table change counter before the lookup
+ * starts. Acquire semantics will make sure that
+ * loads in compare_signatures are not hoisted.
+ */
+ cnt_b = __atomic_load_n(h->tbl_chng_cnt,
+ __ATOMIC_ACQUIRE);
+
+ /* Compare signatures and prefetch key slot of first hit */
+ for (i = 0; i < num_keys; i++) {
+ compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+ primary_bkt[i], secondary_bkt[i],
+ sig[i], h->sig_cmp_fn);
+
+ if (prim_hitmask[i]) {
+ uint32_t first_hit =
+ __builtin_ctzl(prim_hitmask[i])
+ >> 1;
+ uint32_t key_idx =
+ primary_bkt[i]->key_idx[first_hit];
+ const struct rte_hash_key *key_slot =
+ (const struct rte_hash_key *)(
+ (const char *)h->key_store +
+ key_idx * h->key_entry_size);
+ rte_prefetch0(key_slot);
+ continue;