net/bnxt: refactor init/uninit
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
index 6af8ca4..1191dfd 100644 (file)
 #include "rte_hash.h"
 #include "rte_cuckoo_hash.h"
 
+/* Mask of all flags supported by this version */
+#define RTE_HASH_EXTRA_FLAGS_MASK (RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT | \
+                                  RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD | \
+                                  RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY | \
+                                  RTE_HASH_EXTRA_FLAGS_EXT_TABLE |     \
+                                  RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL | \
+                                  RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)
+
 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
        for (CURRENT_BKT = START_BUCKET;                                      \
                CURRENT_BKT != NULL;                                          \
@@ -44,6 +52,11 @@ static struct rte_tailq_elem rte_hash_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_hash_tailq)
 
+struct __rte_hash_rcu_dq_entry {
+       uint32_t key_idx;
+       uint32_t ext_bkt_idx;
+};
+
 struct rte_hash *
 rte_hash_find_existing(const char *name)
 {
@@ -143,6 +156,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
        unsigned int no_free_on_del = 0;
        uint32_t *ext_bkt_to_free = NULL;
        uint32_t *tbl_chng_cnt = NULL;
+       struct lcore_cache *local_free_slots = NULL;
        unsigned int readwrite_concur_lf_support = 0;
        uint32_t i;
 
@@ -164,6 +178,12 @@ rte_hash_create(const struct rte_hash_parameters *params)
                return NULL;
        }
 
+       if (params->extra_flag & ~RTE_HASH_EXTRA_FLAGS_MASK) {
+               rte_errno = EINVAL;
+               RTE_LOG(ERR, HASH, "rte_hash_create: unsupported extra flags\n");
+               return NULL;
+       }
+
        /* Validate correct usage of extra options */
        if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
            (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
@@ -195,7 +215,10 @@ rte_hash_create(const struct rte_hash_parameters *params)
 
        if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
                readwrite_concur_lf_support = 1;
-               /* Enable not freeing internal memory/index on delete */
+               /* Enable not freeing internal memory/index on delete.
+                * If internal RCU is enabled, freeing of internal memory/index
+                * is done on delete
+                */
                no_free_on_del = 1;
        }
 
@@ -369,9 +392,13 @@ rte_hash_create(const struct rte_hash_parameters *params)
 #endif
 
        if (use_local_cache) {
-               h->local_free_slots = rte_zmalloc_socket(NULL,
+               local_free_slots = rte_zmalloc_socket(NULL,
                                sizeof(struct lcore_cache) * RTE_MAX_LCORE,
                                RTE_CACHE_LINE_SIZE, params->socket_id);
+               if (local_free_slots == NULL) {
+                       RTE_LOG(ERR, HASH, "local free slots memory allocation failed\n");
+                       goto err_unlock;
+               }
        }
 
        /* Default hash function */
@@ -402,6 +429,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
        *h->tbl_chng_cnt = 0;
        h->hw_trans_mem_support = hw_trans_mem_support;
        h->use_local_cache = use_local_cache;
+       h->local_free_slots = local_free_slots;
        h->readwrite_concur_support = readwrite_concur_support;
        h->ext_table_support = ext_table_support;
        h->writer_takes_lock = writer_takes_lock;
@@ -447,6 +475,7 @@ err:
        rte_ring_free(r);
        rte_ring_free(r_ext);
        rte_free(te);
+       rte_free(local_free_slots);
        rte_free(h);
        rte_free(buckets);
        rte_free(buckets_ext);
@@ -484,6 +513,9 @@ rte_hash_free(struct rte_hash *h)
 
        rte_mcfg_tailq_write_unlock();
 
+       if (h->dq)
+               rte_rcu_qsbr_dq_delete(h->dq);
+
        if (h->use_local_cache)
                rte_free(h->local_free_slots);
        if (h->writer_takes_lock)
@@ -586,11 +618,20 @@ void
 rte_hash_reset(struct rte_hash *h)
 {
        uint32_t tot_ring_cnt, i;
+       unsigned int pending;
 
        if (h == NULL)
                return;
 
        __hash_rw_writer_lock(h);
+
+       if (h->dq) {
+               /* Reclaim all the resources */
+               rte_rcu_qsbr_dq_reclaim(h->dq, ~0, NULL, &pending, NULL);
+               if (pending != 0)
+                       RTE_LOG(ERR, HASH, "RCU reclaim all resources failed\n");
+       }
+
        memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
        memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
        *h->tbl_chng_cnt = 0;
@@ -931,6 +972,38 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
        return -ENOSPC;
 }
 
+static inline uint32_t
+alloc_slot(const struct rte_hash *h, struct lcore_cache *cached_free_slots)
+{
+       unsigned int n_slots;
+       uint32_t slot_id;
+
+       if (h->use_local_cache) {
+               /* Try to get a free slot from the local cache */
+               if (cached_free_slots->len == 0) {
+                       /* Need to get another burst of free slots from global ring */
+                       n_slots = rte_ring_mc_dequeue_burst_elem(h->free_slots,
+                                       cached_free_slots->objs,
+                                       sizeof(uint32_t),
+                                       LCORE_CACHE_SIZE, NULL);
+                       if (n_slots == 0)
+                               return EMPTY_SLOT;
+
+                       cached_free_slots->len += n_slots;
+               }
+
+               /* Get a free slot from the local cache */
+               cached_free_slots->len--;
+               slot_id = cached_free_slots->objs[cached_free_slots->len];
+       } else {
+               if (rte_ring_sc_dequeue_elem(h->free_slots, &slot_id,
+                                               sizeof(uint32_t)) != 0)
+                       return EMPTY_SLOT;
+       }
+
+       return slot_id;
+}
+
 static inline int32_t
 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
                                                hash_sig_t sig, void *data)
@@ -939,10 +1012,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
        uint32_t prim_bucket_idx, sec_bucket_idx;
        struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
        struct rte_hash_key *new_k, *keys = h->key_store;
+       uint32_t ext_bkt_id = 0;
        uint32_t slot_id;
-       uint32_t ext_bkt_id;
        int ret;
-       unsigned n_slots;
        unsigned lcore_id;
        unsigned int i;
        struct lcore_cache *cached_free_slots = NULL;
@@ -980,28 +1052,20 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
        if (h->use_local_cache) {
                lcore_id = rte_lcore_id();
                cached_free_slots = &h->local_free_slots[lcore_id];
-               /* Try to get a free slot from the local cache */
-               if (cached_free_slots->len == 0) {
-                       /* Need to get another burst of free slots from global ring */
-                       n_slots = rte_ring_mc_dequeue_burst_elem(h->free_slots,
-                                       cached_free_slots->objs,
-                                       sizeof(uint32_t),
-                                       LCORE_CACHE_SIZE, NULL);
-                       if (n_slots == 0) {
-                               return -ENOSPC;
-                       }
-
-                       cached_free_slots->len += n_slots;
+       }
+       slot_id = alloc_slot(h, cached_free_slots);
+       if (slot_id == EMPTY_SLOT) {
+               if (h->dq) {
+                       __hash_rw_writer_lock(h);
+                       ret = rte_rcu_qsbr_dq_reclaim(h->dq,
+                                       h->hash_rcu_cfg->max_reclaim_size,
+                                       NULL, NULL, NULL);
+                       __hash_rw_writer_unlock(h);
+                       if (ret == 0)
+                               slot_id = alloc_slot(h, cached_free_slots);
                }
-
-               /* Get a free slot from the local cache */
-               cached_free_slots->len--;
-               slot_id = cached_free_slots->objs[cached_free_slots->len];
-       } else {
-               if (rte_ring_sc_dequeue_elem(h->free_slots, &slot_id,
-                                               sizeof(uint32_t)) != 0) {
+               if (slot_id == EMPTY_SLOT)
                        return -ENOSPC;
-               }
        }
 
        new_k = RTE_PTR_ADD(keys, slot_id * h->key_entry_size);
@@ -1095,9 +1159,21 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
         * extendable bucket. We first get a free bucket from ring.
         */
        if (rte_ring_sc_dequeue_elem(h->free_ext_bkts, &ext_bkt_id,
-                                               sizeof(uint32_t)) != 0) {
-               ret = -ENOSPC;
-               goto failure;
+                                               sizeof(uint32_t)) != 0 ||
+                                       ext_bkt_id == 0) {
+               if (h->dq) {
+                       if (rte_rcu_qsbr_dq_reclaim(h->dq,
+                                       h->hash_rcu_cfg->max_reclaim_size,
+                                       NULL, NULL, NULL) == 0) {
+                               rte_ring_sc_dequeue_elem(h->free_ext_bkts,
+                                                        &ext_bkt_id,
+                                                        sizeof(uint32_t));
+                       }
+               }
+               if (ext_bkt_id == 0) {
+                       ret = -ENOSPC;
+                       goto failure;
+               }
        }
 
        /* Use the first location of the new bucket */
@@ -1373,12 +1449,13 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
        return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
 }
 
-static inline void
-remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
+static int
+free_slot(const struct rte_hash *h, uint32_t slot_id)
 {
        unsigned lcore_id, n_slots;
-       struct lcore_cache *cached_free_slots;
+       struct lcore_cache *cached_free_slots = NULL;
 
+       /* Return key indexes to free slot ring */
        if (h->use_local_cache) {
                lcore_id = rte_lcore_id();
                cached_free_slots = &h->local_free_slots[lcore_id];
@@ -1389,18 +1466,128 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
                                                cached_free_slots->objs,
                                                sizeof(uint32_t),
                                                LCORE_CACHE_SIZE, NULL);
-                       ERR_IF_TRUE((n_slots == 0),
-                               "%s: could not enqueue free slots in global ring\n",
-                               __func__);
+                       RETURN_IF_TRUE((n_slots == 0), -EFAULT);
                        cached_free_slots->len -= n_slots;
                }
-               /* Put index of new free slot in cache. */
-               cached_free_slots->objs[cached_free_slots->len] =
-                                                       bkt->key_idx[i];
-               cached_free_slots->len++;
+       }
+
+       enqueue_slot_back(h, cached_free_slots, slot_id);
+       return 0;
+}
+
+static void
+__hash_rcu_qsbr_free_resource(void *p, void *e, unsigned int n)
+{
+       void *key_data = NULL;
+       int ret;
+       struct rte_hash_key *keys, *k;
+       struct rte_hash *h = (struct rte_hash *)p;
+       struct __rte_hash_rcu_dq_entry rcu_dq_entry =
+                       *((struct __rte_hash_rcu_dq_entry *)e);
+
+       RTE_SET_USED(n);
+       keys = h->key_store;
+
+       k = (struct rte_hash_key *) ((char *)keys +
+                               rcu_dq_entry.key_idx * h->key_entry_size);
+       key_data = k->pdata;
+       if (h->hash_rcu_cfg->free_key_data_func)
+               h->hash_rcu_cfg->free_key_data_func(h->hash_rcu_cfg->key_data_ptr,
+                                                   key_data);
+
+       if (h->ext_table_support && rcu_dq_entry.ext_bkt_idx != EMPTY_SLOT)
+               /* Recycle empty ext bkt to free list. */
+               rte_ring_sp_enqueue_elem(h->free_ext_bkts,
+                       &rcu_dq_entry.ext_bkt_idx, sizeof(uint32_t));
+
+       /* Return key indexes to free slot ring */
+       ret = free_slot(h, rcu_dq_entry.key_idx);
+       if (ret < 0) {
+               RTE_LOG(ERR, HASH,
+                       "%s: could not enqueue free slots in global ring\n",
+                               __func__);
+       }
+}
+
+int
+rte_hash_rcu_qsbr_add(struct rte_hash *h, struct rte_hash_rcu_config *cfg)
+{
+       struct rte_rcu_qsbr_dq_parameters params = {0};
+       char rcu_dq_name[RTE_RCU_QSBR_DQ_NAMESIZE];
+       struct rte_hash_rcu_config *hash_rcu_cfg = NULL;
+
+       if (h == NULL || cfg == NULL || cfg->v == NULL) {
+               rte_errno = EINVAL;
+               return 1;
+       }
+
+       const uint32_t total_entries = h->use_local_cache ?
+               h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
+                                                       : h->entries + 1;
+
+       if (h->hash_rcu_cfg) {
+               rte_errno = EEXIST;
+               return 1;
+       }
+
+       hash_rcu_cfg = rte_zmalloc(NULL, sizeof(struct rte_hash_rcu_config), 0);
+       if (hash_rcu_cfg == NULL) {
+               RTE_LOG(ERR, HASH, "memory allocation failed\n");
+               return 1;
+       }
+
+       if (cfg->mode == RTE_HASH_QSBR_MODE_SYNC) {
+               /* No other things to do. */
+       } else if (cfg->mode == RTE_HASH_QSBR_MODE_DQ) {
+               /* Init QSBR defer queue. */
+               snprintf(rcu_dq_name, sizeof(rcu_dq_name),
+                                       "HASH_RCU_%s", h->name);
+               params.name = rcu_dq_name;
+               params.size = cfg->dq_size;
+               if (params.size == 0)
+                       params.size = total_entries;
+               params.trigger_reclaim_limit = cfg->trigger_reclaim_limit;
+               if (params.max_reclaim_size == 0)
+                       params.max_reclaim_size = RTE_HASH_RCU_DQ_RECLAIM_MAX;
+               params.esize = sizeof(struct __rte_hash_rcu_dq_entry);
+               params.free_fn = __hash_rcu_qsbr_free_resource;
+               params.p = h;
+               params.v = cfg->v;
+               h->dq = rte_rcu_qsbr_dq_create(&params);
+               if (h->dq == NULL) {
+                       rte_free(hash_rcu_cfg);
+                       RTE_LOG(ERR, HASH, "HASH defer queue creation failed\n");
+                       return 1;
+               }
        } else {
-               rte_ring_sp_enqueue_elem(h->free_slots,
-                               &bkt->key_idx[i], sizeof(uint32_t));
+               rte_free(hash_rcu_cfg);
+               rte_errno = EINVAL;
+               return 1;
+       }
+
+       hash_rcu_cfg->v = cfg->v;
+       hash_rcu_cfg->mode = cfg->mode;
+       hash_rcu_cfg->dq_size = params.size;
+       hash_rcu_cfg->trigger_reclaim_limit = params.trigger_reclaim_limit;
+       hash_rcu_cfg->max_reclaim_size = params.max_reclaim_size;
+       hash_rcu_cfg->free_key_data_func = cfg->free_key_data_func;
+       hash_rcu_cfg->key_data_ptr = cfg->key_data_ptr;
+
+       h->hash_rcu_cfg = hash_rcu_cfg;
+
+       return 0;
+}
+
+static inline void
+remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt,
+               unsigned int i)
+{
+       int ret = free_slot(h, bkt->key_idx[i]);
+
+       if (ret < 0) {
+               RTE_LOG(ERR, HASH,
+                       "%s: could not enqueue free slots in global ring\n",
+                               __func__);
        }
 }
 
@@ -1499,6 +1686,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
        int pos;
        int32_t ret, i;
        uint16_t short_sig;
+       uint32_t index = EMPTY_SLOT;
+       struct __rte_hash_rcu_dq_entry rcu_dq_entry;
 
        short_sig = get_short_sig(sig);
        prim_bucket_idx = get_prim_bucket_index(h, sig);
@@ -1533,10 +1722,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 
 /* Search last bucket to see if empty to be recycled */
 return_bkt:
-       if (!last_bkt) {
-               __hash_rw_writer_unlock(h);
-               return ret;
-       }
+       if (!last_bkt)
+               goto return_key;
+
        while (last_bkt->next) {
                prev_bkt = last_bkt;
                last_bkt = last_bkt->next;
@@ -1549,11 +1737,11 @@ return_bkt:
        /* found empty bucket and recycle */
        if (i == RTE_HASH_BUCKET_ENTRIES) {
                prev_bkt->next = NULL;
-               uint32_t index = last_bkt - h->buckets_ext + 1;
+               index = last_bkt - h->buckets_ext + 1;
                /* Recycle the empty bkt if
                 * no_free_on_del is disabled.
                 */
-               if (h->no_free_on_del)
+               if (h->no_free_on_del) {
                        /* Store index of an empty ext bkt to be recycled
                         * on calling rte_hash_del_xxx APIs.
                         * When lock free read-write concurrency is enabled,
@@ -1561,12 +1749,34 @@ return_bkt:
                         * immediately (as readers might be using it still).
                         * Hence freeing of the ext bkt is piggy-backed to
                         * freeing of the key index.
+                        * If using external RCU, store this index in an array.
                         */
-                       h->ext_bkt_to_free[ret] = index;
-               else
+                       if (h->hash_rcu_cfg == NULL)
+                               h->ext_bkt_to_free[ret] = index;
+               } else
                        rte_ring_sp_enqueue_elem(h->free_ext_bkts, &index,
                                                        sizeof(uint32_t));
        }
+
+return_key:
+       /* Using internal RCU QSBR */
+       if (h->hash_rcu_cfg) {
+               /* Key index where key is stored, adding the first dummy index */
+               rcu_dq_entry.key_idx = ret + 1;
+               rcu_dq_entry.ext_bkt_idx = index;
+               if (h->dq == NULL) {
+                       /* Wait for quiescent state change if using
+                        * RTE_HASH_QSBR_MODE_SYNC
+                        */
+                       rte_rcu_qsbr_synchronize(h->hash_rcu_cfg->v,
+                                                RTE_QSBR_THRID_INVALID);
+                       __hash_rcu_qsbr_free_resource((void *)((uintptr_t)h),
+                                                     &rcu_dq_entry, 1);
+               } else if (h->dq)
+                       /* Push into QSBR FIFO if using RTE_HASH_QSBR_MODE_DQ */
+                       if (rte_rcu_qsbr_dq_enqueue(h->dq, &rcu_dq_entry) != 0)
+                               RTE_LOG(ERR, HASH, "Failed to push QSBR FIFO\n");
+       }
        __hash_rw_writer_unlock(h);
        return ret;
 }
@@ -1615,8 +1825,6 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
 
        RETURN_IF_TRUE(((h == NULL) || (key_idx == EMPTY_SLOT)), -EINVAL);
 
-       unsigned int lcore_id, n_slots;
-       struct lcore_cache *cached_free_slots;
        const uint32_t total_entries = h->use_local_cache ?
                h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
                                                        : h->entries + 1;
@@ -1634,28 +1842,9 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
                }
        }
 
-       if (h->use_local_cache) {
-               lcore_id = rte_lcore_id();
-               cached_free_slots = &h->local_free_slots[lcore_id];
-               /* Cache full, need to free it. */
-               if (cached_free_slots->len == LCORE_CACHE_SIZE) {
-                       /* Need to enqueue the free slots in global ring. */
-                       n_slots = rte_ring_mp_enqueue_burst_elem(h->free_slots,
-                                               cached_free_slots->objs,
-                                               sizeof(uint32_t),
-                                               LCORE_CACHE_SIZE, NULL);
-                       RETURN_IF_TRUE((n_slots == 0), -EFAULT);
-                       cached_free_slots->len -= n_slots;
-               }
-               /* Put index of new free slot in cache. */
-               cached_free_slots->objs[cached_free_slots->len] = key_idx;
-               cached_free_slots->len++;
-       } else {
-               rte_ring_sp_enqueue_elem(h->free_slots, &key_idx,
-                                               sizeof(uint32_t));
-       }
+       /* Enqueue slot to cache/ring of free slots. */
+       return free_slot(h, key_idx);
 
-       return 0;
 }
 
 static inline void
@@ -1669,7 +1858,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 
        /* For match mask the first bit of every two bits indicates the match */
        switch (sig_cmp_fn) {
-#if defined(RTE_MACHINE_CPUFLAG_SSE2)
+#if defined(__SSE2__)
        case RTE_HASH_COMPARE_SSE:
                /* Compare all signatures in the bucket */
                *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
@@ -1682,7 +1871,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
                                        (__m128i const *)sec_bkt->sig_current),
                                _mm_set1_epi16(sig)));
                break;
-#elif defined(RTE_MACHINE_CPUFLAG_NEON)
+#elif defined(__ARM_NEON)
        case RTE_HASH_COMPARE_NEON: {
                uint16x8_t vmat, vsig, x;
                int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
@@ -1711,64 +1900,20 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
        }
 }
 
-#define PREFETCH_OFFSET 4
 static inline void
-__rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
-                       int32_t num_keys, int32_t *positions,
-                       uint64_t *hit_mask, void *data[])
+__bulk_lookup_l(const struct rte_hash *h, const void **keys,
+               const struct rte_hash_bucket **primary_bkt,
+               const struct rte_hash_bucket **secondary_bkt,
+               uint16_t *sig, int32_t num_keys, int32_t *positions,
+               uint64_t *hit_mask, void *data[])
 {
        uint64_t hits = 0;
        int32_t i;
        int32_t ret;
-       uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
-       uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
-       uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
-       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
-       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
-       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
        uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        struct rte_hash_bucket *cur_bkt, *next_bkt;
 
-       /* Prefetch first keys */
-       for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
-               rte_prefetch0(keys[i]);
-
-       /*
-        * Prefetch rest of the keys, calculate primary and
-        * secondary bucket and prefetch them
-        */
-       for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
-               rte_prefetch0(keys[i + PREFETCH_OFFSET]);
-
-               prim_hash[i] = rte_hash_hash(h, keys[i]);
-
-               sig[i] = get_short_sig(prim_hash[i]);
-               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
-               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
-
-               primary_bkt[i] = &h->buckets[prim_index[i]];
-               secondary_bkt[i] = &h->buckets[sec_index[i]];
-
-               rte_prefetch0(primary_bkt[i]);
-               rte_prefetch0(secondary_bkt[i]);
-       }
-
-       /* Calculate and prefetch rest of the buckets */
-       for (; i < num_keys; i++) {
-               prim_hash[i] = rte_hash_hash(h, keys[i]);
-
-               sig[i] = get_short_sig(prim_hash[i]);
-               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
-               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
-
-               primary_bkt[i] = &h->buckets[prim_index[i]];
-               secondary_bkt[i] = &h->buckets[sec_index[i]];
-
-               rte_prefetch0(primary_bkt[i]);
-               rte_prefetch0(secondary_bkt[i]);
-       }
-
        __hash_rw_reader_lock(h);
 
        /* Compare signatures and prefetch key slot of first hit */
@@ -1903,63 +2048,20 @@ next_key:
 }
 
 static inline void
-__rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
-                       int32_t num_keys, int32_t *positions,
-                       uint64_t *hit_mask, void *data[])
+__bulk_lookup_lf(const struct rte_hash *h, const void **keys,
+               const struct rte_hash_bucket **primary_bkt,
+               const struct rte_hash_bucket **secondary_bkt,
+               uint16_t *sig, int32_t num_keys, int32_t *positions,
+               uint64_t *hit_mask, void *data[])
 {
        uint64_t hits = 0;
        int32_t i;
        int32_t ret;
-       uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
-       uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
-       uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
-       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
-       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
-       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
        uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        struct rte_hash_bucket *cur_bkt, *next_bkt;
        uint32_t cnt_b, cnt_a;
 
-       /* Prefetch first keys */
-       for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
-               rte_prefetch0(keys[i]);
-
-       /*
-        * Prefetch rest of the keys, calculate primary and
-        * secondary bucket and prefetch them
-        */
-       for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
-               rte_prefetch0(keys[i + PREFETCH_OFFSET]);
-
-               prim_hash[i] = rte_hash_hash(h, keys[i]);
-
-               sig[i] = get_short_sig(prim_hash[i]);
-               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
-               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
-
-               primary_bkt[i] = &h->buckets[prim_index[i]];
-               secondary_bkt[i] = &h->buckets[sec_index[i]];
-
-               rte_prefetch0(primary_bkt[i]);
-               rte_prefetch0(secondary_bkt[i]);
-       }
-
-       /* Calculate and prefetch rest of the buckets */
-       for (; i < num_keys; i++) {
-               prim_hash[i] = rte_hash_hash(h, keys[i]);
-
-               sig[i] = get_short_sig(prim_hash[i]);
-               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
-               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
-
-               primary_bkt[i] = &h->buckets[prim_index[i]];
-               secondary_bkt[i] = &h->buckets[sec_index[i]];
-
-               rte_prefetch0(primary_bkt[i]);
-               rte_prefetch0(secondary_bkt[i]);
-       }
-
        for (i = 0; i < num_keys; i++)
                positions[i] = -ENOENT;
 
@@ -2124,6 +2226,92 @@ next_key:
                *hit_mask = hits;
 }
 
+#define PREFETCH_OFFSET 4
+static inline void
+__bulk_lookup_prefetching_loop(const struct rte_hash *h,
+       const void **keys, int32_t num_keys,
+       uint16_t *sig,
+       const struct rte_hash_bucket **primary_bkt,
+       const struct rte_hash_bucket **secondary_bkt)
+{
+       int32_t i;
+       uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+
+       /* Prefetch first keys */
+       for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
+               rte_prefetch0(keys[i]);
+
+       /*
+        * Prefetch rest of the keys, calculate primary and
+        * secondary bucket and prefetch them
+        */
+       for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
+               rte_prefetch0(keys[i + PREFETCH_OFFSET]);
+
+               prim_hash[i] = rte_hash_hash(h, keys[i]);
+
+               sig[i] = get_short_sig(prim_hash[i]);
+               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+               primary_bkt[i] = &h->buckets[prim_index[i]];
+               secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+               rte_prefetch0(primary_bkt[i]);
+               rte_prefetch0(secondary_bkt[i]);
+       }
+
+       /* Calculate and prefetch rest of the buckets */
+       for (; i < num_keys; i++) {
+               prim_hash[i] = rte_hash_hash(h, keys[i]);
+
+               sig[i] = get_short_sig(prim_hash[i]);
+               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+               primary_bkt[i] = &h->buckets[prim_index[i]];
+               secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+               rte_prefetch0(primary_bkt[i]);
+               rte_prefetch0(secondary_bkt[i]);
+       }
+}
+
+
+static inline void
+__rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
+                       int32_t num_keys, int32_t *positions,
+                       uint64_t *hit_mask, void *data[])
+{
+       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+
+       __bulk_lookup_prefetching_loop(h, keys, num_keys, sig,
+               primary_bkt, secondary_bkt);
+
+       __bulk_lookup_l(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
+               positions, hit_mask, data);
+}
+
+static inline void
+__rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
+                       int32_t num_keys, int32_t *positions,
+                       uint64_t *hit_mask, void *data[])
+{
+       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+
+       __bulk_lookup_prefetching_loop(h, keys, num_keys, sig,
+               primary_bkt, secondary_bkt);
+
+       __bulk_lookup_lf(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
+               positions, hit_mask, data);
+}
+
 static inline void
 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
                        int32_t num_keys, int32_t *positions,
@@ -2165,6 +2353,123 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
        return __builtin_popcountl(*hit_mask);
 }
 
+
+static inline void
+__rte_hash_lookup_with_hash_bulk_l(const struct rte_hash *h,
+                       const void **keys, hash_sig_t *prim_hash,
+                       int32_t num_keys, int32_t *positions,
+                       uint64_t *hit_mask, void *data[])
+{
+       int32_t i;
+       uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+
+       /*
+        * Prefetch keys, calculate primary and
+        * secondary bucket and prefetch them
+        */
+       for (i = 0; i < num_keys; i++) {
+               rte_prefetch0(keys[i]);
+
+               sig[i] = get_short_sig(prim_hash[i]);
+               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+               primary_bkt[i] = &h->buckets[prim_index[i]];
+               secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+               rte_prefetch0(primary_bkt[i]);
+               rte_prefetch0(secondary_bkt[i]);
+       }
+
+       __bulk_lookup_l(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
+               positions, hit_mask, data);
+}
+
+static inline void
+__rte_hash_lookup_with_hash_bulk_lf(const struct rte_hash *h,
+                       const void **keys, hash_sig_t *prim_hash,
+                       int32_t num_keys, int32_t *positions,
+                       uint64_t *hit_mask, void *data[])
+{
+       int32_t i;
+       uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+
+       /*
+        * Prefetch keys, calculate primary and
+        * secondary bucket and prefetch them
+        */
+       for (i = 0; i < num_keys; i++) {
+               rte_prefetch0(keys[i]);
+
+               sig[i] = get_short_sig(prim_hash[i]);
+               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+               primary_bkt[i] = &h->buckets[prim_index[i]];
+               secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+               rte_prefetch0(primary_bkt[i]);
+               rte_prefetch0(secondary_bkt[i]);
+       }
+
+       __bulk_lookup_lf(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
+               positions, hit_mask, data);
+}
+
+static inline void
+__rte_hash_lookup_with_hash_bulk(const struct rte_hash *h, const void **keys,
+                       hash_sig_t *prim_hash, int32_t num_keys,
+                       int32_t *positions, uint64_t *hit_mask, void *data[])
+{
+       if (h->readwrite_concur_lf_support)
+               __rte_hash_lookup_with_hash_bulk_lf(h, keys, prim_hash,
+                               num_keys, positions, hit_mask, data);
+       else
+               __rte_hash_lookup_with_hash_bulk_l(h, keys, prim_hash,
+                               num_keys, positions, hit_mask, data);
+}
+
+int
+rte_hash_lookup_with_hash_bulk(const struct rte_hash *h, const void **keys,
+               hash_sig_t *sig, uint32_t num_keys, int32_t *positions)
+{
+       RETURN_IF_TRUE(((h == NULL) || (keys == NULL) ||
+                       (sig == NULL) || (num_keys == 0) ||
+                       (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
+                       (positions == NULL)), -EINVAL);
+
+       __rte_hash_lookup_with_hash_bulk(h, keys, sig, num_keys,
+               positions, NULL, NULL);
+       return 0;
+}
+
+int
+rte_hash_lookup_with_hash_bulk_data(const struct rte_hash *h,
+               const void **keys, hash_sig_t *sig,
+               uint32_t num_keys, uint64_t *hit_mask, void *data[])
+{
+       RETURN_IF_TRUE(((h == NULL) || (keys == NULL) ||
+                       (sig == NULL) || (num_keys == 0) ||
+                       (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
+                       (hit_mask == NULL)), -EINVAL);
+
+       int32_t positions[num_keys];
+
+       __rte_hash_lookup_with_hash_bulk(h, keys, sig, num_keys,
+                       positions, hit_mask, data);
+
+       /* Return number of hits */
+       return __builtin_popcountl(*hit_mask);
+}
+
 int32_t
 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
 {