4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <rte_jhash.h>
37 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
38 #include <rte_hash_crc.h>
39 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
41 #include "ip_frag_common.h"
43 #define PRIME_VALUE 0xeaad8405
45 #define IP_FRAG_TBL_POS(tbl, sig) \
46 ((tbl)->pkt + ((sig) & (tbl)->entry_mask))
48 #ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
49 #define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v))
51 #define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0)
52 #endif /* IP_FRAG_TBL_STAT */
54 /* local frag table helper functions */
56 ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
57 struct ip_frag_pkt *fp)
60 ip_frag_key_invalidate(&fp->key);
61 TAILQ_REMOVE(&tbl->lru, fp, lru);
63 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
67 ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct ip_frag_pkt *fp,
68 const struct ip_frag_key *key, uint64_t tms)
71 ip_frag_reset(fp, tms);
72 TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
74 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
78 ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
79 struct ip_frag_pkt *fp, uint64_t tms)
82 ip_frag_reset(fp, tms);
83 TAILQ_REMOVE(&tbl->lru, fp, lru);
84 TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
85 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
90 ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
95 p = (const uint32_t *)&key->src_dst;
97 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
98 v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
99 v = rte_hash_crc_4byte(p[1], v);
100 v = rte_hash_crc_4byte(key->id, v);
103 v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
104 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
107 *v2 = (v << 7) + (v >> 14);
111 ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
116 p = (const uint32_t *) &key->src_dst;
118 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
119 v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
120 v = rte_hash_crc_4byte(p[1], v);
121 v = rte_hash_crc_4byte(p[2], v);
122 v = rte_hash_crc_4byte(p[3], v);
123 v = rte_hash_crc_4byte(p[4], v);
124 v = rte_hash_crc_4byte(p[5], v);
125 v = rte_hash_crc_4byte(p[6], v);
126 v = rte_hash_crc_4byte(p[7], v);
127 v = rte_hash_crc_4byte(key->id, v);
130 v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE);
131 v = rte_jhash_3words(p[3], p[4], p[5], v);
132 v = rte_jhash_3words(p[6], p[7], key->id, v);
133 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
136 *v2 = (v << 7) + (v >> 14);
140 ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
141 struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
145 fp->frag_size += len;
147 /* this is the first fragment. */
149 idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
150 IP_FIRST_FRAG_IDX : UINT32_MAX;
152 /* this is the last fragment. */
153 } else if (more_frags == 0) {
154 fp->total_size = ofs + len;
155 idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
156 IP_LAST_FRAG_IDX : UINT32_MAX;
158 /* this is the intermediate fragment. */
159 } else if ((idx = fp->last_idx) <
160 sizeof (fp->frags) / sizeof (fp->frags[0])) {
165 * errorneous packet: either exceeed max allowed number of fragments,
166 * or duplicate first/last fragment encountered.
168 if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
170 /* report an error. */
171 if (fp->key.key_len == IPV4_KEYLEN)
172 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
173 "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
174 "total_size: %u, frag_size: %u, last_idx: %u\n"
175 "first fragment: ofs: %u, len: %u\n"
176 "last fragment: ofs: %u, len: %u\n\n",
178 fp, fp->key.src_dst[0], fp->key.id,
179 fp->total_size, fp->frag_size, fp->last_idx,
180 fp->frags[IP_FIRST_FRAG_IDX].ofs,
181 fp->frags[IP_FIRST_FRAG_IDX].len,
182 fp->frags[IP_LAST_FRAG_IDX].ofs,
183 fp->frags[IP_LAST_FRAG_IDX].len);
185 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
186 "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
187 "total_size: %u, frag_size: %u, last_idx: %u\n"
188 "first fragment: ofs: %u, len: %u\n"
189 "last fragment: ofs: %u, len: %u\n\n",
191 fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
192 fp->total_size, fp->frag_size, fp->last_idx,
193 fp->frags[IP_FIRST_FRAG_IDX].ofs,
194 fp->frags[IP_FIRST_FRAG_IDX].len,
195 fp->frags[IP_LAST_FRAG_IDX].ofs,
196 fp->frags[IP_LAST_FRAG_IDX].len);
198 /* free all fragments, invalidate the entry. */
199 ip_frag_free(fp, dr);
200 ip_frag_key_invalidate(&fp->key);
201 IP_FRAG_MBUF2DR(dr, mb);
206 fp->frags[idx].ofs = ofs;
207 fp->frags[idx].len = len;
208 fp->frags[idx].mb = mb;
212 /* not all fragments are collected yet. */
213 if (likely (fp->frag_size < fp->total_size)) {
216 /* if we collected all fragments, then try to reassemble. */
217 } else if (fp->frag_size == fp->total_size &&
218 fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
219 if (fp->key.key_len == IPV4_KEYLEN)
220 mb = ipv4_frag_reassemble(fp);
222 mb = ipv6_frag_reassemble(fp);
225 /* errorenous set of fragments. */
228 /* report an error. */
229 if (fp->key.key_len == IPV4_KEYLEN)
230 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
231 "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
232 "total_size: %u, frag_size: %u, last_idx: %u\n"
233 "first fragment: ofs: %u, len: %u\n"
234 "last fragment: ofs: %u, len: %u\n\n",
236 fp, fp->key.src_dst[0], fp->key.id,
237 fp->total_size, fp->frag_size, fp->last_idx,
238 fp->frags[IP_FIRST_FRAG_IDX].ofs,
239 fp->frags[IP_FIRST_FRAG_IDX].len,
240 fp->frags[IP_LAST_FRAG_IDX].ofs,
241 fp->frags[IP_LAST_FRAG_IDX].len);
243 IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
244 "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
245 "total_size: %u, frag_size: %u, last_idx: %u\n"
246 "first fragment: ofs: %u, len: %u\n"
247 "last fragment: ofs: %u, len: %u\n\n",
249 fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
250 fp->total_size, fp->frag_size, fp->last_idx,
251 fp->frags[IP_FIRST_FRAG_IDX].ofs,
252 fp->frags[IP_FIRST_FRAG_IDX].len,
253 fp->frags[IP_LAST_FRAG_IDX].ofs,
254 fp->frags[IP_LAST_FRAG_IDX].len);
256 /* free associated resources. */
257 ip_frag_free(fp, dr);
260 /* we are done with that entry, invalidate it. */
261 ip_frag_key_invalidate(&fp->key);
267 * Find an entry in the table for the corresponding fragment.
268 * If such entry is not present, then allocate a new one.
269 * If the entry is stale, then free and reuse it.
272 ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
273 const struct ip_frag_key *key, uint64_t tms)
275 struct ip_frag_pkt *pkt, *free, *stale, *lru;
279 * Actually the two line below are totally redundant.
280 * they are here, just to make gcc 4.6 happy.
284 max_cycles = tbl->max_cycles;
286 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
288 if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
290 /*timed-out entry, free and invalidate it*/
292 ip_frag_tbl_del(tbl, dr, stale);
296 * we found a free entry, check if we can use it.
297 * If we run out of free entries in the table, then
298 * check if we have a timed out entry to delete.
300 } else if (free != NULL &&
301 tbl->max_entries <= tbl->use_entries) {
302 lru = TAILQ_FIRST(&tbl->lru);
303 if (max_cycles + lru->start < tms) {
304 ip_frag_tbl_del(tbl, dr, lru);
307 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
312 /* found a free entry to reuse. */
314 ip_frag_tbl_add(tbl, free, key, tms);
319 * we found the flow, but it is already timed out,
320 * so free associated resources, reposition it in the LRU list,
323 } else if (max_cycles + pkt->start < tms) {
324 ip_frag_tbl_reuse(tbl, dr, pkt, tms);
327 IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
334 ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
335 const struct ip_frag_key *key, uint64_t tms,
336 struct ip_frag_pkt **free, struct ip_frag_pkt **stale)
338 struct ip_frag_pkt *p1, *p2;
339 struct ip_frag_pkt *empty, *old;
341 uint32_t i, assoc, sig1, sig2;
346 max_cycles = tbl->max_cycles;
347 assoc = tbl->bucket_entries;
349 if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0)
352 /* different hashing methods for IPv4 and IPv6 */
353 if (key->key_len == IPV4_KEYLEN)
354 ipv4_frag_hash(key, &sig1, &sig2);
356 ipv6_frag_hash(key, &sig1, &sig2);
358 p1 = IP_FRAG_TBL_POS(tbl, sig1);
359 p2 = IP_FRAG_TBL_POS(tbl, sig2);
361 for (i = 0; i != assoc; i++) {
362 if (p1->key.key_len == IPV4_KEYLEN)
363 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
364 "tbl: %p, max_entries: %u, use_entries: %u\n"
365 "ipv6_frag_pkt line0: %p, index: %u from %u\n"
366 "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
368 tbl, tbl->max_entries, tbl->use_entries,
370 p1[i].key.src_dst[0], p1[i].key.id, p1[i].start);
372 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
373 "tbl: %p, max_entries: %u, use_entries: %u\n"
374 "ipv6_frag_pkt line0: %p, index: %u from %u\n"
375 "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
377 tbl, tbl->max_entries, tbl->use_entries,
379 IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start);
381 if (ip_frag_key_cmp(key, &p1[i].key) == 0)
383 else if (ip_frag_key_is_empty(&p1[i].key))
384 empty = (empty == NULL) ? (p1 + i) : empty;
385 else if (max_cycles + p1[i].start < tms)
386 old = (old == NULL) ? (p1 + i) : old;
388 if (p2->key.key_len == IPV4_KEYLEN)
389 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
390 "tbl: %p, max_entries: %u, use_entries: %u\n"
391 "ipv6_frag_pkt line1: %p, index: %u from %u\n"
392 "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
394 tbl, tbl->max_entries, tbl->use_entries,
396 p2[i].key.src_dst[0], p2[i].key.id, p2[i].start);
398 IP_FRAG_LOG(DEBUG, "%s:%d:\n"
399 "tbl: %p, max_entries: %u, use_entries: %u\n"
400 "ipv6_frag_pkt line1: %p, index: %u from %u\n"
401 "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
403 tbl, tbl->max_entries, tbl->use_entries,
405 IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start);
407 if (ip_frag_key_cmp(key, &p2[i].key) == 0)
409 else if (ip_frag_key_is_empty(&p2[i].key))
410 empty = (empty == NULL) ?( p2 + i) : empty;
411 else if (max_cycles + p2[i].start < tms)
412 old = (old == NULL) ? (p2 + i) : old;