4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <rte_byteorder.h>
38 #include <rte_jhash.h>
39 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
40 #include <rte_hash_crc.h>
41 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
43 #include "rte_ip_frag.h"
44 #include "ip_frag_common.h"
46 #define PRIME_VALUE 0xeaad8405
48 #define IP_FRAG_TBL_POS(tbl, sig) \
49 ((tbl)->pkt + ((sig) & (tbl)->entry_mask))
51 #ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
52 #define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v))
54 #define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0)
55 #endif /* IP_FRAG_TBL_STAT */
57 /* local frag table helper functions */
59 ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
60 struct rte_ip_frag_pkt *fp)
63 ip_frag_key_invalidate(&fp->key);
64 TAILQ_REMOVE(&tbl->lru, fp, lru);
66 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
70 ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_pkt *fp,
71 const struct ip_frag_key *key, uint64_t tms)
74 ip_frag_reset(fp, tms);
75 TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
77 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
81 ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
82 struct rte_ip_frag_pkt *fp, uint64_t tms)
85 ip_frag_reset(fp, tms);
86 TAILQ_REMOVE(&tbl->lru, fp, lru);
87 TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
88 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
93 ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
98 p = (const uint32_t *)&key->src_dst;
100 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
101 v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
102 v = rte_hash_crc_4byte(p[1], v);
103 v = rte_hash_crc_4byte(key->id, v);
106 v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
107 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
110 *v2 = (v << 7) + (v >> 14);
114 ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
119 p = (const uint32_t *) &key->src_dst;
121 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
122 v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
123 v = rte_hash_crc_4byte(p[1], v);
124 v = rte_hash_crc_4byte(p[2], v);
125 v = rte_hash_crc_4byte(p[3], v);
126 v = rte_hash_crc_4byte(p[4], v);
127 v = rte_hash_crc_4byte(p[5], v);
128 v = rte_hash_crc_4byte(p[6], v);
129 v = rte_hash_crc_4byte(p[7], v);
130 v = rte_hash_crc_4byte(key->id, v);
133 v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE);
134 v = rte_jhash_3words(p[3], p[4], p[5], v);
135 v = rte_jhash_3words(p[6], p[7], key->id, v);
136 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
139 *v2 = (v << 7) + (v >> 14);
143 ip_frag_process(struct rte_ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
144 struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
148 fp->frag_size += len;
150 /* this is the first fragment. */
152 idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
153 IP_FIRST_FRAG_IDX : UINT32_MAX;
155 /* this is the last fragment. */
156 } else if (more_frags == 0) {
157 fp->total_size = ofs + len;
158 idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
159 IP_LAST_FRAG_IDX : UINT32_MAX;
161 /* this is the intermediate fragment. */
162 } else if ((idx = fp->last_idx) <
163 sizeof (fp->frags) / sizeof (fp->frags[0])) {
168 * errorneous packet: either exceeed max allowed number of fragments,
169 * or duplicate first/last fragment encountered.
171 if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
173 /* report an error. */
174 if (fp->key.key_len == IPV4_KEYLEN)
175 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
176 "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
177 "total_size: %u, frag_size: %u, last_idx: %u\n"
178 "first fragment: ofs: %u, len: %u\n"
179 "last fragment: ofs: %u, len: %u\n\n",
181 fp, fp->key.src_dst[0], fp->key.id,
182 fp->total_size, fp->frag_size, fp->last_idx,
183 fp->frags[IP_FIRST_FRAG_IDX].ofs,
184 fp->frags[IP_FIRST_FRAG_IDX].len,
185 fp->frags[IP_LAST_FRAG_IDX].ofs,
186 fp->frags[IP_LAST_FRAG_IDX].len);
188 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
189 "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
190 "total_size: %u, frag_size: %u, last_idx: %u\n"
191 "first fragment: ofs: %u, len: %u\n"
192 "last fragment: ofs: %u, len: %u\n\n",
194 fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
195 fp->total_size, fp->frag_size, fp->last_idx,
196 fp->frags[IP_FIRST_FRAG_IDX].ofs,
197 fp->frags[IP_FIRST_FRAG_IDX].len,
198 fp->frags[IP_LAST_FRAG_IDX].ofs,
199 fp->frags[IP_LAST_FRAG_IDX].len);
201 /* free all fragments, invalidate the entry. */
202 ip_frag_free(fp, dr);
203 ip_frag_key_invalidate(&fp->key);
204 IP_FRAG_MBUF2DR(dr, mb);
209 fp->frags[idx].ofs = ofs;
210 fp->frags[idx].len = len;
211 fp->frags[idx].mb = mb;
215 /* not all fragments are collected yet. */
216 if (likely (fp->frag_size < fp->total_size)) {
219 /* if we collected all fragments, then try to reassemble. */
220 } else if (fp->frag_size == fp->total_size &&
221 fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
222 if (fp->key.key_len == IPV4_KEYLEN)
223 mb = ipv4_frag_reassemble(fp);
225 mb = ipv6_frag_reassemble(fp);
228 /* errorenous set of fragments. */
231 /* report an error. */
232 if (fp->key.key_len == IPV4_KEYLEN)
233 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
234 "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
235 "total_size: %u, frag_size: %u, last_idx: %u\n"
236 "first fragment: ofs: %u, len: %u\n"
237 "last fragment: ofs: %u, len: %u\n\n",
239 fp, fp->key.src_dst[0], fp->key.id,
240 fp->total_size, fp->frag_size, fp->last_idx,
241 fp->frags[IP_FIRST_FRAG_IDX].ofs,
242 fp->frags[IP_FIRST_FRAG_IDX].len,
243 fp->frags[IP_LAST_FRAG_IDX].ofs,
244 fp->frags[IP_LAST_FRAG_IDX].len);
246 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
247 "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
248 "total_size: %u, frag_size: %u, last_idx: %u\n"
249 "first fragment: ofs: %u, len: %u\n"
250 "last fragment: ofs: %u, len: %u\n\n",
252 fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
253 fp->total_size, fp->frag_size, fp->last_idx,
254 fp->frags[IP_FIRST_FRAG_IDX].ofs,
255 fp->frags[IP_FIRST_FRAG_IDX].len,
256 fp->frags[IP_LAST_FRAG_IDX].ofs,
257 fp->frags[IP_LAST_FRAG_IDX].len);
259 /* free associated resources. */
260 ip_frag_free(fp, dr);
263 /* we are done with that entry, invalidate it. */
264 ip_frag_key_invalidate(&fp->key);
270 * Find an entry in the table for the corresponding fragment.
271 * If such entry is not present, then allocate a new one.
272 * If the entry is stale, then free and reuse it.
274 struct rte_ip_frag_pkt *
275 ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
276 const struct ip_frag_key *key, uint64_t tms)
278 struct rte_ip_frag_pkt *pkt, *free, *stale, *lru;
282 * Actually the two line below are totally redundant.
283 * they are here, just to make gcc 4.6 happy.
287 max_cycles = tbl->max_cycles;
289 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
291 if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
293 /*timed-out entry, free and invalidate it*/
295 ip_frag_tbl_del(tbl, dr, stale);
299 * we found a free entry, check if we can use it.
300 * If we run out of free entries in the table, then
301 * check if we have a timed out entry to delete.
303 } else if (free != NULL &&
304 tbl->max_entries <= tbl->use_entries) {
305 lru = TAILQ_FIRST(&tbl->lru);
306 if (max_cycles + lru->start < tms) {
307 ip_frag_tbl_del(tbl, dr, lru);
310 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
315 /* found a free entry to reuse. */
317 ip_frag_tbl_add(tbl, free, key, tms);
322 * we found the flow, but it is already timed out,
323 * so free associated resources, reposition it in the LRU list,
326 } else if (max_cycles + pkt->start < tms) {
327 ip_frag_tbl_reuse(tbl, dr, pkt, tms);
330 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
336 struct rte_ip_frag_pkt *
337 ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
338 const struct ip_frag_key *key, uint64_t tms,
339 struct rte_ip_frag_pkt **free, struct rte_ip_frag_pkt **stale)
341 struct rte_ip_frag_pkt *p1, *p2;
342 struct rte_ip_frag_pkt *empty, *old;
344 uint32_t i, assoc, sig1, sig2;
349 max_cycles = tbl->max_cycles;
350 assoc = tbl->bucket_entries;
352 if (tbl->last != NULL && ip_frag_key_cmp(&tbl->last->key, key) == 0)
355 /* different hashing methods for IPv4 and IPv6 */
356 if (key->key_len == 1)
357 ipv4_frag_hash(key, &sig1, &sig2);
359 ipv6_frag_hash(key, &sig1, &sig2);
361 p1 = IP_FRAG_TBL_POS(tbl, sig1);
362 p2 = IP_FRAG_TBL_POS(tbl, sig2);
364 for (i = 0; i != assoc; i++) {
365 if (p1->key.key_len == IPV4_KEYLEN)
366 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
367 "tbl: %p, max_entries: %u, use_entries: %u\n"
368 "ipv6_frag_pkt line0: %p, index: %u from %u\n"
369 "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
371 tbl, tbl->max_entries, tbl->use_entries,
373 p1[i].key.src_dst[0], p1[i].key.id, p1[i].start);
375 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
376 "tbl: %p, max_entries: %u, use_entries: %u\n"
377 "ipv6_frag_pkt line0: %p, index: %u from %u\n"
378 "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
380 tbl, tbl->max_entries, tbl->use_entries,
382 IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start);
384 if (ip_frag_key_cmp(&p1[i].key, key) == 0)
386 else if (ip_frag_key_is_empty(&p1[i].key))
387 empty = (empty == NULL) ? (p1 + i) : empty;
388 else if (max_cycles + p1[i].start < tms)
389 old = (old == NULL) ? (p1 + i) : old;
391 if (p2->key.key_len == IPV4_KEYLEN)
392 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
393 "tbl: %p, max_entries: %u, use_entries: %u\n"
394 "ipv6_frag_pkt line1: %p, index: %u from %u\n"
395 "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
397 tbl, tbl->max_entries, tbl->use_entries,
399 p2[i].key.src_dst[0], p2[i].key.id, p2[i].start);
401 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
402 "tbl: %p, max_entries: %u, use_entries: %u\n"
403 "ipv6_frag_pkt line1: %p, index: %u from %u\n"
404 "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
406 tbl, tbl->max_entries, tbl->use_entries,
408 IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start);
410 if (ip_frag_key_cmp(&p2[i].key, key) == 0)
412 else if (ip_frag_key_is_empty(&p2[i].key))
413 empty = (empty == NULL) ?( p2 + i) : empty;
414 else if (max_cycles + p2[i].start < tms)
415 old = (old == NULL) ? (p2 + i) : old;