From: Anatoly Burakov Date: Wed, 28 May 2014 17:32:35 +0000 (+0100) Subject: ip_frag: move fragmentation/reassembly headers into a library X-Git-Tag: spdx-start~10722 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=601e279df074a339334885a6f19de9f6db988755;p=dpdk.git ip_frag: move fragmentation/reassembly headers into a library Signed-off-by: Anatoly Burakov Acked-by: Thomas Monjalon --- diff --git a/config/common_bsdapp b/config/common_bsdapp index ef8eeab1c7..574172137b 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -266,6 +266,11 @@ CONFIG_RTE_MAX_LCORE_FREQS=64 # CONFIG_RTE_LIBRTE_NET=y +# +# Compile librte_ip_frag +# +CONFIG_RTE_LIBRTE_IP_FRAG=y + # # Compile librte_meter # diff --git a/config/common_linuxapp b/config/common_linuxapp index 5453c57d86..b696322ed3 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -302,6 +302,11 @@ CONFIG_RTE_MAX_LCORE_FREQS=64 # CONFIG_RTE_LIBRTE_NET=y +# +# Compile librte_ip_frag +# +CONFIG_RTE_LIBRTE_IP_FRAG=y + # # Compile librte_meter # diff --git a/examples/ip_reassembly/ipv4_frag_tbl.h b/examples/ip_reassembly/ipv4_frag_tbl.h deleted file mode 100644 index c9eb50205d..0000000000 --- a/examples/ip_reassembly/ipv4_frag_tbl.h +++ /dev/null @@ -1,400 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _IPV4_FRAG_TBL_H_ -#define _IPV4_FRAG_TBL_H_ - -/** - * @file - * IPv4 fragments table. - * - * Implementation of IPv4 fragment table create/destroy/find/update. - * - */ - -/* - * The ipv4_frag_tbl is a simple hash table: - * The basic idea is to use two hash functions and - * associativity. This provides 2 * possible locations in - * the hash table for each key. Sort of simplified Cuckoo hashing, - * when the collision occurs and all 2 * are occupied, - * instead of resinserting existing keys into alternative locations, we just - * return a faiure. - * Another thing timing: entries that resides in the table longer then - * are considered as invalid, and could be removed/replaced - * byt the new ones. - * pair is stored together, all add/update/lookup opearions are not - * MT safe. - */ - -#include -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 -#include -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - -#define PRIME_VALUE 0xeaad8405 - -TAILQ_HEAD(ipv4_pkt_list, ipv4_frag_pkt); - -struct ipv4_frag_tbl_stat { - uint64_t find_num; /* total # of find/insert attempts. */ - uint64_t add_num; /* # of add ops. */ - uint64_t del_num; /* # of del ops. */ - uint64_t reuse_num; /* # of reuse (del/add) ops. */ - uint64_t fail_total; /* total # of add failures. */ - uint64_t fail_nospace; /* # of 'no space' add failures. */ -} __rte_cache_aligned; - -struct ipv4_frag_tbl { - uint64_t max_cycles; /* ttl for table entries. */ - uint32_t entry_mask; /* hash value mask. */ - uint32_t max_entries; /* max entries allowed. */ - uint32_t use_entries; /* entries in use. */ - uint32_t bucket_entries; /* hash assocaitivity. */ - uint32_t nb_entries; /* total size of the table. */ - uint32_t nb_buckets; /* num of associativity lines. */ - struct ipv4_frag_pkt *last; /* last used entry. */ - struct ipv4_pkt_list lru; /* LRU list for table entries. */ - struct ipv4_frag_tbl_stat stat; /* statistics counters. */ - struct ipv4_frag_pkt pkt[0]; /* hash table. */ -}; - -#define IPV4_FRAG_TBL_POS(tbl, sig) \ - ((tbl)->pkt + ((sig) & (tbl)->entry_mask)) - -#define IPV4_FRAG_HASH_FNUM 2 - -#ifdef IPV4_FRAG_TBL_STAT -#define IPV4_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v)) -#else -#define IPV4_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0) -#endif /* IPV4_FRAG_TBL_STAT */ - -static inline void -ipv4_frag_hash(const struct ipv4_frag_key *key, uint32_t *v1, uint32_t *v2) -{ - uint32_t v; - const uint32_t *p; - - p = (const uint32_t *)&key->src_dst; - -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 - v = rte_hash_crc_4byte(p[0], PRIME_VALUE); - v = rte_hash_crc_4byte(p[1], v); - v = rte_hash_crc_4byte(key->id, v); -#else - - v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - - *v1 = v; - *v2 = (v << 7) + (v >> 14); -} - -/* - * Update the table, after we finish processing it's entry. - */ -static inline void -ipv4_frag_inuse(struct ipv4_frag_tbl *tbl, const struct ipv4_frag_pkt *fp) -{ - if (IPV4_FRAG_KEY_EMPTY(&fp->key)) { - TAILQ_REMOVE(&tbl->lru, fp, lru); - tbl->use_entries--; - } -} - -/* - * For the given key, try to find an existing entry. - * If such entry doesn't exist, will return free and/or timed-out entry, - * that can be used for that key. - */ -static inline struct ipv4_frag_pkt * -ipv4_frag_lookup(struct ipv4_frag_tbl *tbl, - const struct ipv4_frag_key *key, uint64_t tms, - struct ipv4_frag_pkt **free, struct ipv4_frag_pkt **stale) -{ - struct ipv4_frag_pkt *p1, *p2; - struct ipv4_frag_pkt *empty, *old; - uint64_t max_cycles; - uint32_t i, assoc, sig1, sig2; - - empty = NULL; - old = NULL; - - max_cycles = tbl->max_cycles; - assoc = tbl->bucket_entries; - - if (tbl->last != NULL && IPV4_FRAG_KEY_CMP(&tbl->last->key, key) == 0) - return (tbl->last); - - ipv4_frag_hash(key, &sig1, &sig2); - p1 = IPV4_FRAG_TBL_POS(tbl, sig1); - p2 = IPV4_FRAG_TBL_POS(tbl, sig2); - - for (i = 0; i != assoc; i++) { - - IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv4_frag_pkt line0: %p, index: %u from %u\n" - "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", - __func__, __LINE__, - tbl, tbl->max_entries, tbl->use_entries, - p1, i, assoc, - p1[i].key.src_dst, p1[i].key.id, p1[i].start); - - if (IPV4_FRAG_KEY_CMP(&p1[i].key, key) == 0) - return (p1 + i); - else if (IPV4_FRAG_KEY_EMPTY(&p1[i].key)) - empty = (empty == NULL) ? (p1 + i) : empty; - else if (max_cycles + p1[i].start < tms) - old = (old == NULL) ? (p1 + i) : old; - - IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv4_frag_pkt line1: %p, index: %u from %u\n" - "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", - __func__, __LINE__, - tbl, tbl->max_entries, tbl->use_entries, - p2, i, assoc, - p2[i].key.src_dst, p2[i].key.id, p2[i].start); - - if (IPV4_FRAG_KEY_CMP(&p2[i].key, key) == 0) - return (p2 + i); - else if (IPV4_FRAG_KEY_EMPTY(&p2[i].key)) - empty = (empty == NULL) ?( p2 + i) : empty; - else if (max_cycles + p2[i].start < tms) - old = (old == NULL) ? (p2 + i) : old; - } - - *free = empty; - *stale = old; - return (NULL); -} - -static inline void -ipv4_frag_tbl_del(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, - struct ipv4_frag_pkt *fp) -{ - ipv4_frag_free(fp, dr); - IPV4_FRAG_KEY_INVALIDATE(&fp->key); - TAILQ_REMOVE(&tbl->lru, fp, lru); - tbl->use_entries--; - IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1); -} - -static inline void -ipv4_frag_tbl_add(struct ipv4_frag_tbl *tbl, struct ipv4_frag_pkt *fp, - const struct ipv4_frag_key *key, uint64_t tms) -{ - fp->key = key[0]; - ipv4_frag_reset(fp, tms); - TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); - tbl->use_entries++; - IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1); -} - -static inline void -ipv4_frag_tbl_reuse(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, - struct ipv4_frag_pkt *fp, uint64_t tms) -{ - ipv4_frag_free(fp, dr); - ipv4_frag_reset(fp, tms); - TAILQ_REMOVE(&tbl->lru, fp, lru); - TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); - IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1); -} - -/* - * Find an entry in the table for the corresponding fragment. - * If such entry is not present, then allocate a new one. - * If the entry is stale, then free and reuse it. - */ -static inline struct ipv4_frag_pkt * -ipv4_frag_find(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, - const struct ipv4_frag_key *key, uint64_t tms) -{ - struct ipv4_frag_pkt *pkt, *free, *stale, *lru; - uint64_t max_cycles; - - /* - * Actually the two line below are totally redundant. - * they are here, just to make gcc 4.6 happy. - */ - free = NULL; - stale = NULL; - max_cycles = tbl->max_cycles; - - IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1); - - if ((pkt = ipv4_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) { - - /*timed-out entry, free and invalidate it*/ - if (stale != NULL) { - ipv4_frag_tbl_del(tbl, dr, stale); - free = stale; - - /* - * we found a free entry, check if we can use it. - * If we run out of free entries in the table, then - * check if we have a timed out entry to delete. - */ - } else if (free != NULL && - tbl->max_entries <= tbl->use_entries) { - lru = TAILQ_FIRST(&tbl->lru); - if (max_cycles + lru->start < tms) { - ipv4_frag_tbl_del(tbl, dr, lru); - } else { - free = NULL; - IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, - fail_nospace, 1); - } - } - - /* found a free entry to reuse. */ - if (free != NULL) { - ipv4_frag_tbl_add(tbl, free, key, tms); - pkt = free; - } - - /* - * we found the flow, but it is already timed out, - * so free associated resources, reposition it in the LRU list, - * and reuse it. - */ - } else if (max_cycles + pkt->start < tms) { - ipv4_frag_tbl_reuse(tbl, dr, pkt, tms); - } - - IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL)); - - tbl->last = pkt; - return (pkt); -} - -/* - * Create a new IPV4 Frag table. - * @param bucket_num - * Number of buckets in the hash table. - * @param bucket_entries - * Number of entries per bucket (e.g. hash associativity). - * Should be power of two. - * @param max_entries - * Maximum number of entries that could be stored in the table. - * The value should be less or equal then bucket_num * bucket_entries. - * @param max_cycles - * Maximum TTL in cycles for each fragmented packet. - * @param socket_id - * The *socket_id* argument is the socket identifier in the case of - * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. - * @return - * The pointer to the new allocated mempool, on success. NULL on error. - */ -static struct ipv4_frag_tbl * -ipv4_frag_tbl_create(uint32_t bucket_num, uint32_t bucket_entries, - uint32_t max_entries, uint64_t max_cycles, int socket_id) -{ - struct ipv4_frag_tbl *tbl; - size_t sz; - uint64_t nb_entries; - - nb_entries = rte_align32pow2(bucket_num); - nb_entries *= bucket_entries; - nb_entries *= IPV4_FRAG_HASH_FNUM; - - /* check input parameters. */ - if (rte_is_power_of_2(bucket_entries) == 0 || - nb_entries > UINT32_MAX || nb_entries == 0 || - nb_entries < max_entries) { - RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__); - return (NULL); - } - - sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]); - if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE, - socket_id)) == NULL) { - RTE_LOG(ERR, USER1, - "%s: allocation of %zu bytes at socket %d failed do\n", - __func__, sz, socket_id); - return (NULL); - } - - RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n", - __func__, sz, socket_id); - - tbl->max_cycles = max_cycles; - tbl->max_entries = max_entries; - tbl->nb_entries = (uint32_t)nb_entries; - tbl->nb_buckets = bucket_num; - tbl->bucket_entries = bucket_entries; - tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1); - - TAILQ_INIT(&(tbl->lru)); - return (tbl); -} - -static inline void -ipv4_frag_tbl_destroy( struct ipv4_frag_tbl *tbl) -{ - rte_free(tbl); -} - -static void -ipv4_frag_tbl_dump_stat(FILE *f, const struct ipv4_frag_tbl *tbl) -{ - uint64_t fail_total, fail_nospace; - - fail_total = tbl->stat.fail_total; - fail_nospace = tbl->stat.fail_nospace; - - fprintf(f, "max entries:\t%u;\n" - "entries in use:\t%u;\n" - "finds/inserts:\t%" PRIu64 ";\n" - "entries added:\t%" PRIu64 ";\n" - "entries deleted by timeout:\t%" PRIu64 ";\n" - "entries reused by timeout:\t%" PRIu64 ";\n" - "total add failures:\t%" PRIu64 ";\n" - "add no-space failures:\t%" PRIu64 ";\n" - "add hash-collisions failures:\t%" PRIu64 ";\n", - tbl->max_entries, - tbl->use_entries, - tbl->stat.find_num, - tbl->stat.add_num, - tbl->stat.del_num, - tbl->stat.reuse_num, - fail_total, - fail_nospace, - fail_total - fail_nospace); -} - - -#endif /* _IPV4_FRAG_TBL_H_ */ diff --git a/examples/ip_reassembly/ipv4_rsmbl.h b/examples/ip_reassembly/ipv4_rsmbl.h deleted file mode 100644 index 61a70ad5b2..0000000000 --- a/examples/ip_reassembly/ipv4_rsmbl.h +++ /dev/null @@ -1,425 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _IPV4_RSMBL_H_ -#define _IPV4_RSMBL_H_ - -/** - * @file - * IPv4 reassemble - * - * Implementation of IPv4 reassemble. - * - */ - -enum { - LAST_FRAG_IDX, - FIRST_FRAG_IDX, - MIN_FRAG_NUM, - MAX_FRAG_NUM = 4, -}; - -struct ipv4_frag { - uint16_t ofs; - uint16_t len; - struct rte_mbuf *mb; -}; - -/* - * Use to uniquely indetify fragmented datagram. - */ -struct ipv4_frag_key { - uint64_t src_dst; - uint32_t id; -}; - -#define IPV4_FRAG_KEY_INVALIDATE(k) ((k)->src_dst = 0) -#define IPV4_FRAG_KEY_EMPTY(k) ((k)->src_dst == 0) - -#define IPV4_FRAG_KEY_CMP(k1, k2) \ - (((k1)->src_dst ^ (k2)->src_dst) | ((k1)->id ^ (k2)->id)) - - -/* - * Fragmented packet to reassemble. - * First two entries in the frags[] array are for the last and first fragments. - */ -struct ipv4_frag_pkt { - TAILQ_ENTRY(ipv4_frag_pkt) lru; /* LRU list */ - struct ipv4_frag_key key; - uint64_t start; /* creation timestamp */ - uint32_t total_size; /* expected reassembled size */ - uint32_t frag_size; /* size of fragments received */ - uint32_t last_idx; /* index of next entry to fill */ - struct ipv4_frag frags[MAX_FRAG_NUM]; -} __rte_cache_aligned; - - -struct ipv4_frag_death_row { - uint32_t cnt; - struct rte_mbuf *row[MAX_PKT_BURST * (MAX_FRAG_NUM + 1)]; -}; - -#define IPV4_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb)) - -/* logging macros. */ - -#ifdef IPV4_FRAG_DEBUG -#define IPV4_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args) -#else -#define IPV4_FRAG_LOG(lvl, fmt, args...) do {} while(0) -#endif /* IPV4_FRAG_DEBUG */ - - -static inline void -ipv4_frag_reset(struct ipv4_frag_pkt *fp, uint64_t tms) -{ - static const struct ipv4_frag zero_frag = { - .ofs = 0, - .len = 0, - .mb = NULL, - }; - - fp->start = tms; - fp->total_size = UINT32_MAX; - fp->frag_size = 0; - fp->last_idx = MIN_FRAG_NUM; - fp->frags[LAST_FRAG_IDX] = zero_frag; - fp->frags[FIRST_FRAG_IDX] = zero_frag; -} - -static inline void -ipv4_frag_free(struct ipv4_frag_pkt *fp, struct ipv4_frag_death_row *dr) -{ - uint32_t i, k; - - k = dr->cnt; - for (i = 0; i != fp->last_idx; i++) { - if (fp->frags[i].mb != NULL) { - dr->row[k++] = fp->frags[i].mb; - fp->frags[i].mb = NULL; - } - } - - fp->last_idx = 0; - dr->cnt = k; -} - -static inline void -ipv4_frag_free_death_row(struct ipv4_frag_death_row *dr, uint32_t prefetch) -{ - uint32_t i, k, n; - - k = RTE_MIN(prefetch, dr->cnt); - n = dr->cnt; - - for (i = 0; i != k; i++) - rte_prefetch0(dr->row[i]); - - for (i = 0; i != n - k; i++) { - rte_prefetch0(dr->row[i + k]); - rte_pktmbuf_free(dr->row[i]); - } - - for (; i != n; i++) - rte_pktmbuf_free(dr->row[i]); - - dr->cnt = 0; -} - -/* - * Helper function. - * Takes 2 mbufs that represents two framents of the same packet and - * chains them into one mbuf. - */ -static inline void -ipv4_frag_chain(struct rte_mbuf *mn, struct rte_mbuf *mp) -{ - struct rte_mbuf *ms; - - /* adjust start of the last fragment data. */ - rte_pktmbuf_adj(mp, (uint16_t)(mp->pkt.vlan_macip.f.l2_len + - mp->pkt.vlan_macip.f.l3_len)); - - /* chain two fragments. */ - ms = rte_pktmbuf_lastseg(mn); - ms->pkt.next = mp; - - /* accumulate number of segments and total length. */ - mn->pkt.nb_segs = (uint8_t)(mn->pkt.nb_segs + mp->pkt.nb_segs); - mn->pkt.pkt_len += mp->pkt.pkt_len; - - /* reset pkt_len and nb_segs for chained fragment. */ - mp->pkt.pkt_len = mp->pkt.data_len; - mp->pkt.nb_segs = 1; -} - -/* - * Reassemble fragments into one packet. - */ -static inline struct rte_mbuf * -ipv4_frag_reassemble(const struct ipv4_frag_pkt *fp) -{ - struct ipv4_hdr *ip_hdr; - struct rte_mbuf *m, *prev; - uint32_t i, n, ofs, first_len; - - first_len = fp->frags[FIRST_FRAG_IDX].len; - n = fp->last_idx - 1; - - /*start from the last fragment. */ - m = fp->frags[LAST_FRAG_IDX].mb; - ofs = fp->frags[LAST_FRAG_IDX].ofs; - - while (ofs != first_len) { - - prev = m; - - for (i = n; i != FIRST_FRAG_IDX && ofs != first_len; i--) { - - /* previous fragment found. */ - if(fp->frags[i].ofs + fp->frags[i].len == ofs) { - - ipv4_frag_chain(fp->frags[i].mb, m); - - /* update our last fragment and offset. */ - m = fp->frags[i].mb; - ofs = fp->frags[i].ofs; - } - } - - /* error - hole in the packet. */ - if (m == prev) { - return (NULL); - } - } - - /* chain with the first fragment. */ - ipv4_frag_chain(fp->frags[FIRST_FRAG_IDX].mb, m); - m = fp->frags[FIRST_FRAG_IDX].mb; - - /* update mbuf fields for reassembled packet. */ - m->ol_flags |= PKT_TX_IP_CKSUM; - - /* update ipv4 header for the reassmebled packet */ - ip_hdr = (struct ipv4_hdr*)(rte_pktmbuf_mtod(m, uint8_t *) + - m->pkt.vlan_macip.f.l2_len); - - ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + - m->pkt.vlan_macip.f.l3_len)); - ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & - rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); - ip_hdr->hdr_checksum = 0; - - return (m); -} - -static inline struct rte_mbuf * -ipv4_frag_process(struct ipv4_frag_pkt *fp, struct ipv4_frag_death_row *dr, - struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags) -{ - uint32_t idx; - - fp->frag_size += len; - - /* this is the first fragment. */ - if (ofs == 0) { - idx = (fp->frags[FIRST_FRAG_IDX].mb == NULL) ? - FIRST_FRAG_IDX : UINT32_MAX; - - /* this is the last fragment. */ - } else if (more_frags == 0) { - fp->total_size = ofs + len; - idx = (fp->frags[LAST_FRAG_IDX].mb == NULL) ? - LAST_FRAG_IDX : UINT32_MAX; - - /* this is the intermediate fragment. */ - } else if ((idx = fp->last_idx) < - sizeof (fp->frags) / sizeof (fp->frags[0])) { - fp->last_idx++; - } - - /* - * errorneous packet: either exceeed max allowed number of fragments, - * or duplicate first/last fragment encountered. - */ - if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) { - - /* report an error. */ - IPV4_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " - "total_size: %u, frag_size: %u, last_idx: %u\n" - "first fragment: ofs: %u, len: %u\n" - "last fragment: ofs: %u, len: %u\n\n", - __func__, __LINE__, - fp, fp->key.src_dst, fp->key.id, - fp->total_size, fp->frag_size, fp->last_idx, - fp->frags[FIRST_FRAG_IDX].ofs, - fp->frags[FIRST_FRAG_IDX].len, - fp->frags[LAST_FRAG_IDX].ofs, - fp->frags[LAST_FRAG_IDX].len); - - /* free all fragments, invalidate the entry. */ - ipv4_frag_free(fp, dr); - IPV4_FRAG_KEY_INVALIDATE(&fp->key); - IPV4_FRAG_MBUF2DR(dr, mb); - - return (NULL); - } - - fp->frags[idx].ofs = ofs; - fp->frags[idx].len = len; - fp->frags[idx].mb = mb; - - mb = NULL; - - /* not all fragments are collected yet. */ - if (likely (fp->frag_size < fp->total_size)) { - return (mb); - - /* if we collected all fragments, then try to reassemble. */ - } else if (fp->frag_size == fp->total_size && - fp->frags[FIRST_FRAG_IDX].mb != NULL) { - mb = ipv4_frag_reassemble(fp); - } - - /* errorenous set of fragments. */ - if (mb == NULL) { - - /* report an error. */ - IPV4_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " - "total_size: %u, frag_size: %u, last_idx: %u\n" - "first fragment: ofs: %u, len: %u\n" - "last fragment: ofs: %u, len: %u\n\n", - __func__, __LINE__, - fp, fp->key.src_dst, fp->key.id, - fp->total_size, fp->frag_size, fp->last_idx, - fp->frags[FIRST_FRAG_IDX].ofs, - fp->frags[FIRST_FRAG_IDX].len, - fp->frags[LAST_FRAG_IDX].ofs, - fp->frags[LAST_FRAG_IDX].len); - - /* free associated resources. */ - ipv4_frag_free(fp, dr); - } - - /* we are done with that entry, invalidate it. */ - IPV4_FRAG_KEY_INVALIDATE(&fp->key); - return (mb); -} - -#include "ipv4_frag_tbl.h" - -/* - * Process new mbuf with fragment of IPV4 packet. - * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. - * @param tbl - * Table where to lookup/add the fragmented packet. - * @param mb - * Incoming mbuf with IPV4 fragment. - * @param tms - * Fragment arrival timestamp. - * @param ip_hdr - * Pointer to the IPV4 header inside the fragment. - * @param ip_ofs - * Fragment's offset (as extracted from the header). - * @param ip_flag - * Fragment's MF flag. - * @return - * Pointer to mbuf for reassebled packet, or NULL if: - * - an error occured. - * - not all fragments of the packet are collected yet. - */ -static inline struct rte_mbuf * -ipv4_frag_mbuf(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, - struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr, - uint16_t ip_ofs, uint16_t ip_flag) -{ - struct ipv4_frag_pkt *fp; - struct ipv4_frag_key key; - const uint64_t *psd; - uint16_t ip_len; - - psd = (uint64_t *)&ip_hdr->src_addr; - key.src_dst = psd[0]; - key.id = ip_hdr->packet_id; - - ip_ofs *= IPV4_HDR_OFFSET_UNITS; - ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - - mb->pkt.vlan_macip.f.l3_len); - - IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" - "mbuf: %p, tms: %" PRIu64 - ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" - "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " - "max_entries: %u, use_entries: %u\n\n", - __func__, __LINE__, - mb, tms, key.src_dst, key.id, ip_ofs, ip_len, ip_flag, - tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, - tbl->use_entries); - - /* try to find/add entry into the fragment's table. */ - if ((fp = ipv4_frag_find(tbl, dr, &key, tms)) == NULL) { - IPV4_FRAG_MBUF2DR(dr, mb); - return (NULL); - } - - IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 - ", total_size: %u, frag_size: %u, last_idx: %u\n\n", - __func__, __LINE__, - tbl, tbl->max_entries, tbl->use_entries, - fp, fp->key.src_dst, fp->key.id, fp->start, - fp->total_size, fp->frag_size, fp->last_idx); - - - /* process the fragmented packet. */ - mb = ipv4_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); - ipv4_frag_inuse(tbl, fp); - - IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" - "mbuf: %p\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 - ", total_size: %u, frag_size: %u, last_idx: %u\n\n", - __func__, __LINE__, mb, - tbl, tbl->max_entries, tbl->use_entries, - fp, fp->key.src_dst, fp->key.id, fp->start, - fp->total_size, fp->frag_size, fp->last_idx); - - return (mb); -} - -#endif /* _IPV4_RSMBL_H_ */ diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c index c4427b3a23..a38733c285 100644 --- a/examples/ip_reassembly/main.c +++ b/examples/ip_reassembly/main.c @@ -94,7 +94,7 @@ #define MAX_PKT_BURST 32 -#include "ipv4_rsmbl.h" +#include "rte_ipv4_rsmbl.h" #ifndef IPv6_BYTES #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ diff --git a/examples/ipv4_frag/main.c b/examples/ipv4_frag/main.c index 3250540389..ff6001538d 100644 --- a/examples/ipv4_frag/main.c +++ b/examples/ipv4_frag/main.c @@ -71,7 +71,7 @@ #include #include -#include "rte_ipv4_frag.h" +#include "rte_ip_frag.h" #include "main.h" #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 diff --git a/examples/ipv4_frag/rte_ipv4_frag.h b/examples/ipv4_frag/rte_ipv4_frag.h deleted file mode 100644 index a96a439646..0000000000 --- a/examples/ipv4_frag/rte_ipv4_frag.h +++ /dev/null @@ -1,251 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __INCLUDE_RTE_IPV4_FRAG_H__ -#define __INCLUDE_RTE_IPV4_FRAG_H__ -#include - -/** - * @file - * RTE IPv4 Fragmentation - * - * Implementation of IPv4 fragmentation. - * - */ - -/* - * Default byte size for the IPv4 Maximum Transfer Unit (MTU). - * This value includes the size of IPv4 header. - */ -#define IPV4_MTU_DEFAULT ETHER_MTU - -/* - * Default payload in bytes for the IPv4 packet. - */ -#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr)) - -/* - * MAX number of fragments per packet allowed. - */ -#define IPV4_MAX_FRAGS_PER_PACKET 0x80 - - -/* Debug on/off */ -#ifdef RTE_IPV4_FRAG_DEBUG - -#define RTE_IPV4_FRAG_ASSERT(exp) \ -if (!(exp)) { \ - rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n", \ - __func__, __LINE__); \ -} - -#else /*RTE_IPV4_FRAG_DEBUG*/ - -#define RTE_IPV4_FRAG_ASSERT(exp) do { } while(0) - -#endif /*RTE_IPV4_FRAG_DEBUG*/ - -/* Fragment Offset */ -#define IPV4_HDR_DF_SHIFT 14 -#define IPV4_HDR_MF_SHIFT 13 -#define IPV4_HDR_FO_SHIFT 3 - -#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) -#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) - -#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1) - -static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, - const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, - uint16_t dofs, uint32_t mf) -{ - rte_memcpy(dst, src, sizeof(*dst)); - fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); - fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); - dst->fragment_offset = rte_cpu_to_be_16(fofs); - dst->total_length = rte_cpu_to_be_16(len); - dst->hdr_checksum = 0; -} - -static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) -{ - uint32_t i; - for (i = 0; i != num; i++) - rte_pktmbuf_free(mb[i]); -} - -/** - * IPv4 fragmentation. - * - * This function implements the fragmentation of IPv4 packets. - * - * @param pkt_in - * The input packet. - * @param pkts_out - * Array storing the output fragments. - * @param mtu_size - * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 - * datagrams. This value includes the size of the IPv4 header. - * @param pool_direct - * MBUF pool used for allocating direct buffers for the output fragments. - * @param pool_indirect - * MBUF pool used for allocating indirect buffers for the output fragments. - * @return - * Upon successful completion - number of output fragments placed - * in the pkts_out array. - * Otherwise - (-1) * . - */ -static inline int32_t rte_ipv4_fragmentation(struct rte_mbuf *pkt_in, - struct rte_mbuf **pkts_out, - uint16_t nb_pkts_out, - uint16_t mtu_size, - struct rte_mempool *pool_direct, - struct rte_mempool *pool_indirect) -{ - struct rte_mbuf *in_seg = NULL; - struct ipv4_hdr *in_hdr; - uint32_t out_pkt_pos, in_seg_data_pos; - uint32_t more_in_segs; - uint16_t fragment_offset, flag_offset, frag_size; - - frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); - - /* Fragment size should be a multiply of 8. */ - RTE_IPV4_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); - - /* Fragment size should be a multiply of 8. */ - RTE_IPV4_FRAG_ASSERT(IPV4_MAX_FRAGS_PER_PACKET * frag_size >= - (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr))); - - in_hdr = (struct ipv4_hdr*) pkt_in->pkt.data; - flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); - - /* If Don't Fragment flag is set */ - if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) - return (-ENOTSUP); - - /* Check that pkts_out is big enough to hold all fragments */ - if (unlikely (frag_size * nb_pkts_out < - (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr)))) - return (-EINVAL); - - in_seg = pkt_in; - in_seg_data_pos = sizeof(struct ipv4_hdr); - out_pkt_pos = 0; - fragment_offset = 0; - - more_in_segs = 1; - while (likely(more_in_segs)) { - struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; - uint32_t more_out_segs; - struct ipv4_hdr *out_hdr; - - /* Allocate direct buffer */ - out_pkt = rte_pktmbuf_alloc(pool_direct); - if (unlikely(out_pkt == NULL)) { - __free_fragments(pkts_out, out_pkt_pos); - return (-ENOMEM); - } - - /* Reserve space for the IP header that will be built later */ - out_pkt->pkt.data_len = sizeof(struct ipv4_hdr); - out_pkt->pkt.pkt_len = sizeof(struct ipv4_hdr); - - out_seg_prev = out_pkt; - more_out_segs = 1; - while (likely(more_out_segs && more_in_segs)) { - struct rte_mbuf *out_seg = NULL; - uint32_t len; - - /* Allocate indirect buffer */ - out_seg = rte_pktmbuf_alloc(pool_indirect); - if (unlikely(out_seg == NULL)) { - rte_pktmbuf_free(out_pkt); - __free_fragments(pkts_out, out_pkt_pos); - return (-ENOMEM); - } - out_seg_prev->pkt.next = out_seg; - out_seg_prev = out_seg; - - /* Prepare indirect buffer */ - rte_pktmbuf_attach(out_seg, in_seg); - len = mtu_size - out_pkt->pkt.pkt_len; - if (len > (in_seg->pkt.data_len - in_seg_data_pos)) { - len = in_seg->pkt.data_len - in_seg_data_pos; - } - out_seg->pkt.data = (char*) in_seg->pkt.data + (uint16_t)in_seg_data_pos; - out_seg->pkt.data_len = (uint16_t)len; - out_pkt->pkt.pkt_len = (uint16_t)(len + - out_pkt->pkt.pkt_len); - out_pkt->pkt.nb_segs += 1; - in_seg_data_pos += len; - - /* Current output packet (i.e. fragment) done ? */ - if (unlikely(out_pkt->pkt.pkt_len >= mtu_size)) { - more_out_segs = 0; - } - - /* Current input segment done ? */ - if (unlikely(in_seg_data_pos == in_seg->pkt.data_len)) { - in_seg = in_seg->pkt.next; - in_seg_data_pos = 0; - - if (unlikely(in_seg == NULL)) { - more_in_segs = 0; - } - } - } - - /* Build the IP header */ - - out_hdr = (struct ipv4_hdr*) out_pkt->pkt.data; - - __fill_ipv4hdr_frag(out_hdr, in_hdr, - (uint16_t)out_pkt->pkt.pkt_len, - flag_offset, fragment_offset, more_in_segs); - - fragment_offset = (uint16_t)(fragment_offset + - out_pkt->pkt.pkt_len - sizeof(struct ipv4_hdr)); - - out_pkt->ol_flags |= PKT_TX_IP_CKSUM; - out_pkt->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr); - - /* Write the fragment to the output list */ - pkts_out[out_pkt_pos] = out_pkt; - out_pkt_pos ++; - } - - return (out_pkt_pos); -} - -#endif diff --git a/lib/Makefile b/lib/Makefile index a9f94b4340..f183f36936 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net +DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile new file mode 100644 index 0000000000..4c353da228 --- /dev/null +++ b/lib/librte_ip_frag/Makefile @@ -0,0 +1,42 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h +SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += ipv4_frag_tbl.h +SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ipv4_rsmbl.h + +# this library depends on rte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ip_frag/ipv4_frag_tbl.h b/lib/librte_ip_frag/ipv4_frag_tbl.h new file mode 100644 index 0000000000..c9eb50205d --- /dev/null +++ b/lib/librte_ip_frag/ipv4_frag_tbl.h @@ -0,0 +1,400 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IPV4_FRAG_TBL_H_ +#define _IPV4_FRAG_TBL_H_ + +/** + * @file + * IPv4 fragments table. + * + * Implementation of IPv4 fragment table create/destroy/find/update. + * + */ + +/* + * The ipv4_frag_tbl is a simple hash table: + * The basic idea is to use two hash functions and + * associativity. This provides 2 * possible locations in + * the hash table for each key. Sort of simplified Cuckoo hashing, + * when the collision occurs and all 2 * are occupied, + * instead of resinserting existing keys into alternative locations, we just + * return a faiure. + * Another thing timing: entries that resides in the table longer then + * are considered as invalid, and could be removed/replaced + * byt the new ones. + * pair is stored together, all add/update/lookup opearions are not + * MT safe. + */ + +#include +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + +#define PRIME_VALUE 0xeaad8405 + +TAILQ_HEAD(ipv4_pkt_list, ipv4_frag_pkt); + +struct ipv4_frag_tbl_stat { + uint64_t find_num; /* total # of find/insert attempts. */ + uint64_t add_num; /* # of add ops. */ + uint64_t del_num; /* # of del ops. */ + uint64_t reuse_num; /* # of reuse (del/add) ops. */ + uint64_t fail_total; /* total # of add failures. */ + uint64_t fail_nospace; /* # of 'no space' add failures. */ +} __rte_cache_aligned; + +struct ipv4_frag_tbl { + uint64_t max_cycles; /* ttl for table entries. */ + uint32_t entry_mask; /* hash value mask. */ + uint32_t max_entries; /* max entries allowed. */ + uint32_t use_entries; /* entries in use. */ + uint32_t bucket_entries; /* hash assocaitivity. */ + uint32_t nb_entries; /* total size of the table. */ + uint32_t nb_buckets; /* num of associativity lines. */ + struct ipv4_frag_pkt *last; /* last used entry. */ + struct ipv4_pkt_list lru; /* LRU list for table entries. */ + struct ipv4_frag_tbl_stat stat; /* statistics counters. */ + struct ipv4_frag_pkt pkt[0]; /* hash table. */ +}; + +#define IPV4_FRAG_TBL_POS(tbl, sig) \ + ((tbl)->pkt + ((sig) & (tbl)->entry_mask)) + +#define IPV4_FRAG_HASH_FNUM 2 + +#ifdef IPV4_FRAG_TBL_STAT +#define IPV4_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v)) +#else +#define IPV4_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0) +#endif /* IPV4_FRAG_TBL_STAT */ + +static inline void +ipv4_frag_hash(const struct ipv4_frag_key *key, uint32_t *v1, uint32_t *v2) +{ + uint32_t v; + const uint32_t *p; + + p = (const uint32_t *)&key->src_dst; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + v = rte_hash_crc_4byte(p[0], PRIME_VALUE); + v = rte_hash_crc_4byte(p[1], v); + v = rte_hash_crc_4byte(key->id, v); +#else + + v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + *v1 = v; + *v2 = (v << 7) + (v >> 14); +} + +/* + * Update the table, after we finish processing it's entry. + */ +static inline void +ipv4_frag_inuse(struct ipv4_frag_tbl *tbl, const struct ipv4_frag_pkt *fp) +{ + if (IPV4_FRAG_KEY_EMPTY(&fp->key)) { + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + } +} + +/* + * For the given key, try to find an existing entry. + * If such entry doesn't exist, will return free and/or timed-out entry, + * that can be used for that key. + */ +static inline struct ipv4_frag_pkt * +ipv4_frag_lookup(struct ipv4_frag_tbl *tbl, + const struct ipv4_frag_key *key, uint64_t tms, + struct ipv4_frag_pkt **free, struct ipv4_frag_pkt **stale) +{ + struct ipv4_frag_pkt *p1, *p2; + struct ipv4_frag_pkt *empty, *old; + uint64_t max_cycles; + uint32_t i, assoc, sig1, sig2; + + empty = NULL; + old = NULL; + + max_cycles = tbl->max_cycles; + assoc = tbl->bucket_entries; + + if (tbl->last != NULL && IPV4_FRAG_KEY_CMP(&tbl->last->key, key) == 0) + return (tbl->last); + + ipv4_frag_hash(key, &sig1, &sig2); + p1 = IPV4_FRAG_TBL_POS(tbl, sig1); + p2 = IPV4_FRAG_TBL_POS(tbl, sig2); + + for (i = 0; i != assoc; i++) { + + IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt line0: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p1, i, assoc, + p1[i].key.src_dst, p1[i].key.id, p1[i].start); + + if (IPV4_FRAG_KEY_CMP(&p1[i].key, key) == 0) + return (p1 + i); + else if (IPV4_FRAG_KEY_EMPTY(&p1[i].key)) + empty = (empty == NULL) ? (p1 + i) : empty; + else if (max_cycles + p1[i].start < tms) + old = (old == NULL) ? (p1 + i) : old; + + IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt line1: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p2, i, assoc, + p2[i].key.src_dst, p2[i].key.id, p2[i].start); + + if (IPV4_FRAG_KEY_CMP(&p2[i].key, key) == 0) + return (p2 + i); + else if (IPV4_FRAG_KEY_EMPTY(&p2[i].key)) + empty = (empty == NULL) ?( p2 + i) : empty; + else if (max_cycles + p2[i].start < tms) + old = (old == NULL) ? (p2 + i) : old; + } + + *free = empty; + *stale = old; + return (NULL); +} + +static inline void +ipv4_frag_tbl_del(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, + struct ipv4_frag_pkt *fp) +{ + ipv4_frag_free(fp, dr); + IPV4_FRAG_KEY_INVALIDATE(&fp->key); + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1); +} + +static inline void +ipv4_frag_tbl_add(struct ipv4_frag_tbl *tbl, struct ipv4_frag_pkt *fp, + const struct ipv4_frag_key *key, uint64_t tms) +{ + fp->key = key[0]; + ipv4_frag_reset(fp, tms); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + tbl->use_entries++; + IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1); +} + +static inline void +ipv4_frag_tbl_reuse(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, + struct ipv4_frag_pkt *fp, uint64_t tms) +{ + ipv4_frag_free(fp, dr); + ipv4_frag_reset(fp, tms); + TAILQ_REMOVE(&tbl->lru, fp, lru); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1); +} + +/* + * Find an entry in the table for the corresponding fragment. + * If such entry is not present, then allocate a new one. + * If the entry is stale, then free and reuse it. + */ +static inline struct ipv4_frag_pkt * +ipv4_frag_find(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, + const struct ipv4_frag_key *key, uint64_t tms) +{ + struct ipv4_frag_pkt *pkt, *free, *stale, *lru; + uint64_t max_cycles; + + /* + * Actually the two line below are totally redundant. + * they are here, just to make gcc 4.6 happy. + */ + free = NULL; + stale = NULL; + max_cycles = tbl->max_cycles; + + IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1); + + if ((pkt = ipv4_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) { + + /*timed-out entry, free and invalidate it*/ + if (stale != NULL) { + ipv4_frag_tbl_del(tbl, dr, stale); + free = stale; + + /* + * we found a free entry, check if we can use it. + * If we run out of free entries in the table, then + * check if we have a timed out entry to delete. + */ + } else if (free != NULL && + tbl->max_entries <= tbl->use_entries) { + lru = TAILQ_FIRST(&tbl->lru); + if (max_cycles + lru->start < tms) { + ipv4_frag_tbl_del(tbl, dr, lru); + } else { + free = NULL; + IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, + fail_nospace, 1); + } + } + + /* found a free entry to reuse. */ + if (free != NULL) { + ipv4_frag_tbl_add(tbl, free, key, tms); + pkt = free; + } + + /* + * we found the flow, but it is already timed out, + * so free associated resources, reposition it in the LRU list, + * and reuse it. + */ + } else if (max_cycles + pkt->start < tms) { + ipv4_frag_tbl_reuse(tbl, dr, pkt, tms); + } + + IPV4_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL)); + + tbl->last = pkt; + return (pkt); +} + +/* + * Create a new IPV4 Frag table. + * @param bucket_num + * Number of buckets in the hash table. + * @param bucket_entries + * Number of entries per bucket (e.g. hash associativity). + * Should be power of two. + * @param max_entries + * Maximum number of entries that could be stored in the table. + * The value should be less or equal then bucket_num * bucket_entries. + * @param max_cycles + * Maximum TTL in cycles for each fragmented packet. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. + * @return + * The pointer to the new allocated mempool, on success. NULL on error. + */ +static struct ipv4_frag_tbl * +ipv4_frag_tbl_create(uint32_t bucket_num, uint32_t bucket_entries, + uint32_t max_entries, uint64_t max_cycles, int socket_id) +{ + struct ipv4_frag_tbl *tbl; + size_t sz; + uint64_t nb_entries; + + nb_entries = rte_align32pow2(bucket_num); + nb_entries *= bucket_entries; + nb_entries *= IPV4_FRAG_HASH_FNUM; + + /* check input parameters. */ + if (rte_is_power_of_2(bucket_entries) == 0 || + nb_entries > UINT32_MAX || nb_entries == 0 || + nb_entries < max_entries) { + RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__); + return (NULL); + } + + sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]); + if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE, + socket_id)) == NULL) { + RTE_LOG(ERR, USER1, + "%s: allocation of %zu bytes at socket %d failed do\n", + __func__, sz, socket_id); + return (NULL); + } + + RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n", + __func__, sz, socket_id); + + tbl->max_cycles = max_cycles; + tbl->max_entries = max_entries; + tbl->nb_entries = (uint32_t)nb_entries; + tbl->nb_buckets = bucket_num; + tbl->bucket_entries = bucket_entries; + tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1); + + TAILQ_INIT(&(tbl->lru)); + return (tbl); +} + +static inline void +ipv4_frag_tbl_destroy( struct ipv4_frag_tbl *tbl) +{ + rte_free(tbl); +} + +static void +ipv4_frag_tbl_dump_stat(FILE *f, const struct ipv4_frag_tbl *tbl) +{ + uint64_t fail_total, fail_nospace; + + fail_total = tbl->stat.fail_total; + fail_nospace = tbl->stat.fail_nospace; + + fprintf(f, "max entries:\t%u;\n" + "entries in use:\t%u;\n" + "finds/inserts:\t%" PRIu64 ";\n" + "entries added:\t%" PRIu64 ";\n" + "entries deleted by timeout:\t%" PRIu64 ";\n" + "entries reused by timeout:\t%" PRIu64 ";\n" + "total add failures:\t%" PRIu64 ";\n" + "add no-space failures:\t%" PRIu64 ";\n" + "add hash-collisions failures:\t%" PRIu64 ";\n", + tbl->max_entries, + tbl->use_entries, + tbl->stat.find_num, + tbl->stat.add_num, + tbl->stat.del_num, + tbl->stat.reuse_num, + fail_total, + fail_nospace, + fail_total - fail_nospace); +} + + +#endif /* _IPV4_FRAG_TBL_H_ */ diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h new file mode 100644 index 0000000000..a96a439646 --- /dev/null +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -0,0 +1,251 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_IPV4_FRAG_H__ +#define __INCLUDE_RTE_IPV4_FRAG_H__ +#include + +/** + * @file + * RTE IPv4 Fragmentation + * + * Implementation of IPv4 fragmentation. + * + */ + +/* + * Default byte size for the IPv4 Maximum Transfer Unit (MTU). + * This value includes the size of IPv4 header. + */ +#define IPV4_MTU_DEFAULT ETHER_MTU + +/* + * Default payload in bytes for the IPv4 packet. + */ +#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr)) + +/* + * MAX number of fragments per packet allowed. + */ +#define IPV4_MAX_FRAGS_PER_PACKET 0x80 + + +/* Debug on/off */ +#ifdef RTE_IPV4_FRAG_DEBUG + +#define RTE_IPV4_FRAG_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n", \ + __func__, __LINE__); \ +} + +#else /*RTE_IPV4_FRAG_DEBUG*/ + +#define RTE_IPV4_FRAG_ASSERT(exp) do { } while(0) + +#endif /*RTE_IPV4_FRAG_DEBUG*/ + +/* Fragment Offset */ +#define IPV4_HDR_DF_SHIFT 14 +#define IPV4_HDR_MF_SHIFT 13 +#define IPV4_HDR_FO_SHIFT 3 + +#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) +#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) + +#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1) + +static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, + const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, + uint16_t dofs, uint32_t mf) +{ + rte_memcpy(dst, src, sizeof(*dst)); + fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); + fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); + dst->fragment_offset = rte_cpu_to_be_16(fofs); + dst->total_length = rte_cpu_to_be_16(len); + dst->hdr_checksum = 0; +} + +static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + for (i = 0; i != num; i++) + rte_pktmbuf_free(mb[i]); +} + +/** + * IPv4 fragmentation. + * + * This function implements the fragmentation of IPv4 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 + * datagrams. This value includes the size of the IPv4 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * . + */ +static inline int32_t rte_ipv4_fragmentation(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect) +{ + struct rte_mbuf *in_seg = NULL; + struct ipv4_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; + uint16_t fragment_offset, flag_offset, frag_size; + + frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); + + /* Fragment size should be a multiply of 8. */ + RTE_IPV4_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); + + /* Fragment size should be a multiply of 8. */ + RTE_IPV4_FRAG_ASSERT(IPV4_MAX_FRAGS_PER_PACKET * frag_size >= + (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr))); + + in_hdr = (struct ipv4_hdr*) pkt_in->pkt.data; + flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); + + /* If Don't Fragment flag is set */ + if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) + return (-ENOTSUP); + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely (frag_size * nb_pkts_out < + (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr)))) + return (-EINVAL); + + in_seg = pkt_in; + in_seg_data_pos = sizeof(struct ipv4_hdr); + out_pkt_pos = 0; + fragment_offset = 0; + + more_in_segs = 1; + while (likely(more_in_segs)) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + struct ipv4_hdr *out_hdr; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(pool_direct); + if (unlikely(out_pkt == NULL)) { + __free_fragments(pkts_out, out_pkt_pos); + return (-ENOMEM); + } + + /* Reserve space for the IP header that will be built later */ + out_pkt->pkt.data_len = sizeof(struct ipv4_hdr); + out_pkt->pkt.pkt_len = sizeof(struct ipv4_hdr); + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (likely(more_out_segs && more_in_segs)) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(pool_indirect); + if (unlikely(out_seg == NULL)) { + rte_pktmbuf_free(out_pkt); + __free_fragments(pkts_out, out_pkt_pos); + return (-ENOMEM); + } + out_seg_prev->pkt.next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = mtu_size - out_pkt->pkt.pkt_len; + if (len > (in_seg->pkt.data_len - in_seg_data_pos)) { + len = in_seg->pkt.data_len - in_seg_data_pos; + } + out_seg->pkt.data = (char*) in_seg->pkt.data + (uint16_t)in_seg_data_pos; + out_seg->pkt.data_len = (uint16_t)len; + out_pkt->pkt.pkt_len = (uint16_t)(len + + out_pkt->pkt.pkt_len); + out_pkt->pkt.nb_segs += 1; + in_seg_data_pos += len; + + /* Current output packet (i.e. fragment) done ? */ + if (unlikely(out_pkt->pkt.pkt_len >= mtu_size)) { + more_out_segs = 0; + } + + /* Current input segment done ? */ + if (unlikely(in_seg_data_pos == in_seg->pkt.data_len)) { + in_seg = in_seg->pkt.next; + in_seg_data_pos = 0; + + if (unlikely(in_seg == NULL)) { + more_in_segs = 0; + } + } + } + + /* Build the IP header */ + + out_hdr = (struct ipv4_hdr*) out_pkt->pkt.data; + + __fill_ipv4hdr_frag(out_hdr, in_hdr, + (uint16_t)out_pkt->pkt.pkt_len, + flag_offset, fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt.pkt_len - sizeof(struct ipv4_hdr)); + + out_pkt->ol_flags |= PKT_TX_IP_CKSUM; + out_pkt->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr); + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; + out_pkt_pos ++; + } + + return (out_pkt_pos); +} + +#endif diff --git a/lib/librte_ip_frag/rte_ipv4_rsmbl.h b/lib/librte_ip_frag/rte_ipv4_rsmbl.h new file mode 100644 index 0000000000..61a70ad5b2 --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv4_rsmbl.h @@ -0,0 +1,425 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IPV4_RSMBL_H_ +#define _IPV4_RSMBL_H_ + +/** + * @file + * IPv4 reassemble + * + * Implementation of IPv4 reassemble. + * + */ + +enum { + LAST_FRAG_IDX, + FIRST_FRAG_IDX, + MIN_FRAG_NUM, + MAX_FRAG_NUM = 4, +}; + +struct ipv4_frag { + uint16_t ofs; + uint16_t len; + struct rte_mbuf *mb; +}; + +/* + * Use to uniquely indetify fragmented datagram. + */ +struct ipv4_frag_key { + uint64_t src_dst; + uint32_t id; +}; + +#define IPV4_FRAG_KEY_INVALIDATE(k) ((k)->src_dst = 0) +#define IPV4_FRAG_KEY_EMPTY(k) ((k)->src_dst == 0) + +#define IPV4_FRAG_KEY_CMP(k1, k2) \ + (((k1)->src_dst ^ (k2)->src_dst) | ((k1)->id ^ (k2)->id)) + + +/* + * Fragmented packet to reassemble. + * First two entries in the frags[] array are for the last and first fragments. + */ +struct ipv4_frag_pkt { + TAILQ_ENTRY(ipv4_frag_pkt) lru; /* LRU list */ + struct ipv4_frag_key key; + uint64_t start; /* creation timestamp */ + uint32_t total_size; /* expected reassembled size */ + uint32_t frag_size; /* size of fragments received */ + uint32_t last_idx; /* index of next entry to fill */ + struct ipv4_frag frags[MAX_FRAG_NUM]; +} __rte_cache_aligned; + + +struct ipv4_frag_death_row { + uint32_t cnt; + struct rte_mbuf *row[MAX_PKT_BURST * (MAX_FRAG_NUM + 1)]; +}; + +#define IPV4_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb)) + +/* logging macros. */ + +#ifdef IPV4_FRAG_DEBUG +#define IPV4_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args) +#else +#define IPV4_FRAG_LOG(lvl, fmt, args...) do {} while(0) +#endif /* IPV4_FRAG_DEBUG */ + + +static inline void +ipv4_frag_reset(struct ipv4_frag_pkt *fp, uint64_t tms) +{ + static const struct ipv4_frag zero_frag = { + .ofs = 0, + .len = 0, + .mb = NULL, + }; + + fp->start = tms; + fp->total_size = UINT32_MAX; + fp->frag_size = 0; + fp->last_idx = MIN_FRAG_NUM; + fp->frags[LAST_FRAG_IDX] = zero_frag; + fp->frags[FIRST_FRAG_IDX] = zero_frag; +} + +static inline void +ipv4_frag_free(struct ipv4_frag_pkt *fp, struct ipv4_frag_death_row *dr) +{ + uint32_t i, k; + + k = dr->cnt; + for (i = 0; i != fp->last_idx; i++) { + if (fp->frags[i].mb != NULL) { + dr->row[k++] = fp->frags[i].mb; + fp->frags[i].mb = NULL; + } + } + + fp->last_idx = 0; + dr->cnt = k; +} + +static inline void +ipv4_frag_free_death_row(struct ipv4_frag_death_row *dr, uint32_t prefetch) +{ + uint32_t i, k, n; + + k = RTE_MIN(prefetch, dr->cnt); + n = dr->cnt; + + for (i = 0; i != k; i++) + rte_prefetch0(dr->row[i]); + + for (i = 0; i != n - k; i++) { + rte_prefetch0(dr->row[i + k]); + rte_pktmbuf_free(dr->row[i]); + } + + for (; i != n; i++) + rte_pktmbuf_free(dr->row[i]); + + dr->cnt = 0; +} + +/* + * Helper function. + * Takes 2 mbufs that represents two framents of the same packet and + * chains them into one mbuf. + */ +static inline void +ipv4_frag_chain(struct rte_mbuf *mn, struct rte_mbuf *mp) +{ + struct rte_mbuf *ms; + + /* adjust start of the last fragment data. */ + rte_pktmbuf_adj(mp, (uint16_t)(mp->pkt.vlan_macip.f.l2_len + + mp->pkt.vlan_macip.f.l3_len)); + + /* chain two fragments. */ + ms = rte_pktmbuf_lastseg(mn); + ms->pkt.next = mp; + + /* accumulate number of segments and total length. */ + mn->pkt.nb_segs = (uint8_t)(mn->pkt.nb_segs + mp->pkt.nb_segs); + mn->pkt.pkt_len += mp->pkt.pkt_len; + + /* reset pkt_len and nb_segs for chained fragment. */ + mp->pkt.pkt_len = mp->pkt.data_len; + mp->pkt.nb_segs = 1; +} + +/* + * Reassemble fragments into one packet. + */ +static inline struct rte_mbuf * +ipv4_frag_reassemble(const struct ipv4_frag_pkt *fp) +{ + struct ipv4_hdr *ip_hdr; + struct rte_mbuf *m, *prev; + uint32_t i, n, ofs, first_len; + + first_len = fp->frags[FIRST_FRAG_IDX].len; + n = fp->last_idx - 1; + + /*start from the last fragment. */ + m = fp->frags[LAST_FRAG_IDX].mb; + ofs = fp->frags[LAST_FRAG_IDX].ofs; + + while (ofs != first_len) { + + prev = m; + + for (i = n; i != FIRST_FRAG_IDX && ofs != first_len; i--) { + + /* previous fragment found. */ + if(fp->frags[i].ofs + fp->frags[i].len == ofs) { + + ipv4_frag_chain(fp->frags[i].mb, m); + + /* update our last fragment and offset. */ + m = fp->frags[i].mb; + ofs = fp->frags[i].ofs; + } + } + + /* error - hole in the packet. */ + if (m == prev) { + return (NULL); + } + } + + /* chain with the first fragment. */ + ipv4_frag_chain(fp->frags[FIRST_FRAG_IDX].mb, m); + m = fp->frags[FIRST_FRAG_IDX].mb; + + /* update mbuf fields for reassembled packet. */ + m->ol_flags |= PKT_TX_IP_CKSUM; + + /* update ipv4 header for the reassmebled packet */ + ip_hdr = (struct ipv4_hdr*)(rte_pktmbuf_mtod(m, uint8_t *) + + m->pkt.vlan_macip.f.l2_len); + + ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + + m->pkt.vlan_macip.f.l3_len)); + ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); + ip_hdr->hdr_checksum = 0; + + return (m); +} + +static inline struct rte_mbuf * +ipv4_frag_process(struct ipv4_frag_pkt *fp, struct ipv4_frag_death_row *dr, + struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags) +{ + uint32_t idx; + + fp->frag_size += len; + + /* this is the first fragment. */ + if (ofs == 0) { + idx = (fp->frags[FIRST_FRAG_IDX].mb == NULL) ? + FIRST_FRAG_IDX : UINT32_MAX; + + /* this is the last fragment. */ + } else if (more_frags == 0) { + fp->total_size = ofs + len; + idx = (fp->frags[LAST_FRAG_IDX].mb == NULL) ? + LAST_FRAG_IDX : UINT32_MAX; + + /* this is the intermediate fragment. */ + } else if ((idx = fp->last_idx) < + sizeof (fp->frags) / sizeof (fp->frags[0])) { + fp->last_idx++; + } + + /* + * errorneous packet: either exceeed max allowed number of fragments, + * or duplicate first/last fragment encountered. + */ + if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) { + + /* report an error. */ + IPV4_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst, fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[FIRST_FRAG_IDX].ofs, + fp->frags[FIRST_FRAG_IDX].len, + fp->frags[LAST_FRAG_IDX].ofs, + fp->frags[LAST_FRAG_IDX].len); + + /* free all fragments, invalidate the entry. */ + ipv4_frag_free(fp, dr); + IPV4_FRAG_KEY_INVALIDATE(&fp->key); + IPV4_FRAG_MBUF2DR(dr, mb); + + return (NULL); + } + + fp->frags[idx].ofs = ofs; + fp->frags[idx].len = len; + fp->frags[idx].mb = mb; + + mb = NULL; + + /* not all fragments are collected yet. */ + if (likely (fp->frag_size < fp->total_size)) { + return (mb); + + /* if we collected all fragments, then try to reassemble. */ + } else if (fp->frag_size == fp->total_size && + fp->frags[FIRST_FRAG_IDX].mb != NULL) { + mb = ipv4_frag_reassemble(fp); + } + + /* errorenous set of fragments. */ + if (mb == NULL) { + + /* report an error. */ + IPV4_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst, fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[FIRST_FRAG_IDX].ofs, + fp->frags[FIRST_FRAG_IDX].len, + fp->frags[LAST_FRAG_IDX].ofs, + fp->frags[LAST_FRAG_IDX].len); + + /* free associated resources. */ + ipv4_frag_free(fp, dr); + } + + /* we are done with that entry, invalidate it. */ + IPV4_FRAG_KEY_INVALIDATE(&fp->key); + return (mb); +} + +#include "ipv4_frag_tbl.h" + +/* + * Process new mbuf with fragment of IPV4 packet. + * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb + * Incoming mbuf with IPV4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @param ip_ofs + * Fragment's offset (as extracted from the header). + * @param ip_flag + * Fragment's MF flag. + * @return + * Pointer to mbuf for reassebled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +static inline struct rte_mbuf * +ipv4_frag_mbuf(struct ipv4_frag_tbl *tbl, struct ipv4_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr, + uint16_t ip_ofs, uint16_t ip_flag) +{ + struct ipv4_frag_pkt *fp; + struct ipv4_frag_key key; + const uint64_t *psd; + uint16_t ip_len; + + psd = (uint64_t *)&ip_hdr->src_addr; + key.src_dst = psd[0]; + key.id = ip_hdr->packet_id; + + ip_ofs *= IPV4_HDR_OFFSET_UNITS; + ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - + mb->pkt.vlan_macip.f.l3_len); + + IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p, tms: %" PRIu64 + ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" + "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " + "max_entries: %u, use_entries: %u\n\n", + __func__, __LINE__, + mb, tms, key.src_dst, key.id, ip_ofs, ip_len, ip_flag, + tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, + tbl->use_entries); + + /* try to find/add entry into the fragment's table. */ + if ((fp = ipv4_frag_find(tbl, dr, &key, tms)) == NULL) { + IPV4_FRAG_MBUF2DR(dr, mb); + return (NULL); + } + + IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst, fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + + /* process the fragmented packet. */ + mb = ipv4_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); + ipv4_frag_inuse(tbl, fp); + + IPV4_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, mb, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst, fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + return (mb); +} + +#endif /* _IPV4_RSMBL_H_ */