4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #ifndef _IPV4_FRAG_TBL_H_
35 #define _IPV4_FRAG_TBL_H_
39 * IPv4 fragments table.
41 * Implementation of IPv4 fragment table create/destroy/find/update.
46 * The ipv4_frag_tbl is a simple hash table:
47 * The basic idea is to use two hash functions and <bucket_entries>
48 * associativity. This provides 2 * <bucket_entries> possible locations in
49 * the hash table for each key. Sort of simplified Cuckoo hashing,
50 * when the collision occurs and all 2 * <bucket_entries> are occupied,
51 * instead of resinserting existing keys into alternative locations, we just
53 * Another thing timing: entries that resides in the table longer then
54 * <max_cycles> are considered as invalid, and could be removed/replaced
56 * <key, data> pair is stored together, all add/update/lookup opearions are not
60 #include <rte_jhash.h>
61 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
62 #include <rte_hash_crc.h>
63 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
65 #define PRIME_VALUE 0xeaad8405
67 TAILQ_HEAD(ipv4_pkt_list, ipv4_frag_pkt);
69 struct ipv4_frag_tbl_stat {
70 uint64_t find_num; /* total # of find/insert attempts. */
71 uint64_t add_num; /* # of add ops. */
72 uint64_t del_num; /* # of del ops. */
73 uint64_t reuse_num; /* # of reuse (del/add) ops. */
74 uint64_t fail_total; /* total # of add failures. */
75 uint64_t fail_nospace; /* # of 'no space' add failures. */
76 } __rte_cache_aligned;
78 struct ipv4_frag_tbl {
79 uint64_t max_cycles; /* ttl for table entries. */
80 uint32_t entry_mask; /* hash value mask. */
81 uint32_t max_entries; /* max entries allowed. */
82 uint32_t use_entries; /* entries in use. */
83 uint32_t bucket_entries; /* hash assocaitivity. */
84 uint32_t nb_entries; /* total size of the table. */
85 uint32_t nb_buckets; /* num of associativity lines. */
86 struct ipv4_frag_pkt *last; /* last used entry. */
87 struct ipv4_pkt_list lru; /* LRU list for table entries. */
88 struct ipv4_frag_tbl_stat stat; /* statistics counters. */
89 struct ipv4_frag_pkt pkt[0]; /* hash table. */
92 #define IPV4_FRAG_TBL_POS(tbl, sig) \
93 ((tbl)->pkt + ((sig) & (tbl)->entry_mask))
95 #define IPV4_FRAG_HASH_FNUM 2
97 #ifdef IPV4_FRAG_TBL_STAT
98 #define IPV4_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v))
100 #define IPV4_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0)
101 #endif /* IPV4_FRAG_TBL_STAT */
104 ipv4_frag_hash(const struct ipv4_frag_key *key, uint32_t *v1, uint32_t *v2)
109 p = (const uint32_t *)&key->src_dst;
111 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
112 v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
113 v = rte_hash_crc_4byte(p[1], v);
114 v = rte_hash_crc_4byte(key->id, v);
117 v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
118 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
121 *v2 = (v << 7) + (v >> 14);
125 * Update the table, after we finish processing it's entry.
128 ipv4_frag_inuse(struct ipv4_frag_tbl *tbl, const struct ipv4_frag_pkt *fp)
130 if (IPV4_FRAG_KEY_EMPTY(&fp->key)) {
131 TAILQ_REMOVE(&tbl->lru, fp, lru);
137 * For the given key, try to find an existing entry.
138 * If such entry doesn't exist, will return free and/or timed-out entry,
139 * that can be used for that key.
141 static inline struct ipv4_frag_pkt *
142 ipv4_frag_lookup(struct ipv4_frag_tbl *tbl,
143 const struct ipv4_frag_key *key, uint64_t tms,
144 struct ipv4_frag_pkt **free, struct ipv4_frag_pkt **stale)
146 struct ipv4_frag_pkt *p1, *p2;
147 struct ipv4_frag_pkt *empty, *old;
149 uint32_t i, assoc, sig1, sig2;
154 max_cycles = tbl->max_cycles;
155 assoc = tbl->bucket_entries;
157 if (tbl->last != NULL && IPV4_FRAG_KEY_CMP(&tbl->last->key, key) == 0)
160 ipv4_frag_hash(key, &sig1, &sig2);
161 p1 = IPV4_FRAG_TBL_POS(tbl, sig1);
162 p2 = IPV4_FRAG_TBL_POS(tbl, sig2);
164 for (i = 0; i != assoc; i++) {
166 IPV4_FRAG_LOG(DEBUG, "%s:%d:\n"
167 "tbl: %p, max_entries: %u, use_entries: %u\n"
168 "ipv4_frag_pkt line0: %p, index: %u from %u\n"
169 "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
171 tbl, tbl->max_entries, tbl->use_entries,
173 p1[i].key.src_dst, p1[i].key.id, p1[i].start);
175 if (IPV4_FRAG_KEY_CMP(&p1[i].key, key) == 0)
177 else if (IPV4_FRAG_KEY_EMPTY(&p1[i].key))
178 empty = (empty == NULL) ? (p1 + i) : empty;
179 else if (max_cycles + p1[i].start < tms)
180 old = (old == NULL) ? (p1 + i) : old;
182 IPV4_FRAG_LOG(DEBUG, "%s:%d:\n"
183 "tbl: %p, max_entries: %u, use_entries: %u\n"
184 "ipv4_frag_pkt line1: %p, index: %u from %u\n"
185 "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
187 tbl, tbl->max_entries, tbl->use_entries,
189 p2[i].key.src_dst, p2[i].key.id, p2[i].start);
191 if (IPV4_FRAG_KEY_CMP(&p2[i].key, key) == 0)
193 else if (IPV4_FRAG_KEY_EMPTY(&p2[i].key))
194 empty = (empty == NULL) ?( p2 + i) : empty;
195 else if (max_cycles + p2[i].start < tms)
196 old = (old == NULL) ? (p2 + i) : old;
205 ipv4_frag_tbl_del(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr,
206 struct ipv4_frag_pkt *fp)
208 ipv4_frag_free(fp, dr);
209 IPV4_FRAG_KEY_INVALIDATE(&fp->key);
210 TAILQ_REMOVE(&tbl->lru, fp, lru);
212 IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
216 ipv4_frag_tbl_add(struct ipv4_frag_tbl *tbl, struct ipv4_frag_pkt *fp,
217 const struct ipv4_frag_key *key, uint64_t tms)
220 ipv4_frag_reset(fp, tms);
221 TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
223 IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
227 ipv4_frag_tbl_reuse(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr,
228 struct ipv4_frag_pkt *fp, uint64_t tms)
230 ipv4_frag_free(fp, dr);
231 ipv4_frag_reset(fp, tms);
232 TAILQ_REMOVE(&tbl->lru, fp, lru);
233 TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
234 IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
238 * Find an entry in the table for the corresponding fragment.
239 * If such entry is not present, then allocate a new one.
240 * If the entry is stale, then free and reuse it.
242 static inline struct ipv4_frag_pkt *
243 ipv4_frag_find(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr,
244 const struct ipv4_frag_key *key, uint64_t tms)
246 struct ipv4_frag_pkt *pkt, *free, *stale, *lru;
250 * Actually the two line below are totally redundant.
251 * they are here, just to make gcc 4.6 happy.
255 max_cycles = tbl->max_cycles;
257 IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
259 if ((pkt = ipv4_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
261 /*timed-out entry, free and invalidate it*/
263 ipv4_frag_tbl_del(tbl, dr, stale);
267 * we found a free entry, check if we can use it.
268 * If we run out of free entries in the table, then
269 * check if we have a timed out entry to delete.
271 } else if (free != NULL &&
272 tbl->max_entries <= tbl->use_entries) {
273 lru = TAILQ_FIRST(&tbl->lru);
274 if (max_cycles + lru->start < tms) {
275 ipv4_frag_tbl_del(tbl, dr, lru);
278 IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat,
283 /* found a free entry to reuse. */
285 ipv4_frag_tbl_add(tbl, free, key, tms);
290 * we found the flow, but it is already timed out,
291 * so free associated resources, reposition it in the LRU list,
294 } else if (max_cycles + pkt->start < tms) {
295 ipv4_frag_tbl_reuse(tbl, dr, pkt, tms);
298 IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
305 * Create a new IPV4 Frag table.
307 * Number of buckets in the hash table.
308 * @param bucket_entries
309 * Number of entries per bucket (e.g. hash associativity).
310 * Should be power of two.
312 * Maximum number of entries that could be stored in the table.
313 * The value should be less or equal then bucket_num * bucket_entries.
315 * Maximum TTL in cycles for each fragmented packet.
317 * The *socket_id* argument is the socket identifier in the case of
318 * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints.
320 * The pointer to the new allocated mempool, on success. NULL on error.
322 static struct ipv4_frag_tbl *
323 ipv4_frag_tbl_create(uint32_t bucket_num, uint32_t bucket_entries,
324 uint32_t max_entries, uint64_t max_cycles, int socket_id)
326 struct ipv4_frag_tbl *tbl;
330 nb_entries = rte_align32pow2(bucket_num);
331 nb_entries *= bucket_entries;
332 nb_entries *= IPV4_FRAG_HASH_FNUM;
334 /* check input parameters. */
335 if (rte_is_power_of_2(bucket_entries) == 0 ||
336 nb_entries > UINT32_MAX || nb_entries == 0 ||
337 nb_entries < max_entries) {
338 RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__);
342 sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]);
343 if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE,
344 socket_id)) == NULL) {
346 "%s: allocation of %zu bytes at socket %d failed do\n",
347 __func__, sz, socket_id);
351 RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n",
352 __func__, sz, socket_id);
354 tbl->max_cycles = max_cycles;
355 tbl->max_entries = max_entries;
356 tbl->nb_entries = (uint32_t)nb_entries;
357 tbl->nb_buckets = bucket_num;
358 tbl->bucket_entries = bucket_entries;
359 tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1);
361 TAILQ_INIT(&(tbl->lru));
366 ipv4_frag_tbl_destroy( struct ipv4_frag_tbl *tbl)
372 ipv4_frag_tbl_dump_stat(FILE *f, const struct ipv4_frag_tbl *tbl)
374 uint64_t fail_total, fail_nospace;
376 fail_total = tbl->stat.fail_total;
377 fail_nospace = tbl->stat.fail_nospace;
379 fprintf(f, "max entries:\t%u;\n"
380 "entries in use:\t%u;\n"
381 "finds/inserts:\t%" PRIu64 ";\n"
382 "entries added:\t%" PRIu64 ";\n"
383 "entries deleted by timeout:\t%" PRIu64 ";\n"
384 "entries reused by timeout:\t%" PRIu64 ";\n"
385 "total add failures:\t%" PRIu64 ";\n"
386 "add no-space failures:\t%" PRIu64 ";\n"
387 "add hash-collisions failures:\t%" PRIu64 ";\n",
396 fail_total - fail_nospace);
400 #endif /* _IPV4_FRAG_TBL_H_ */