From 4c38e5532a07dd0866b02edb2410a8bb0ebbacad Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 28 May 2014 18:32:36 +0100 Subject: [PATCH] ip_frag: refactor IPv4 fragmentation into a proper library Signed-off-by: Anatoly Burakov [Thomas: add in doxygen] Acked-by: Thomas Monjalon --- doc/doxy-api-index.md | 1 + doc/doxy-api.conf | 1 + examples/ipv4_frag/main.c | 11 + lib/librte_ip_frag/Makefile | 9 + lib/librte_ip_frag/rte_ip_frag.h | 190 +--------------- lib/librte_ip_frag/rte_ipv4_fragmentation.c | 239 ++++++++++++++++++++ mk/rte.app.mk | 4 + 7 files changed, 272 insertions(+), 183 deletions(-) create mode 100644 lib/librte_ip_frag/rte_ipv4_fragmentation.c diff --git a/doc/doxy-api-index.md b/doc/doxy-api-index.md index 83303a166b..1b8c7f871a 100644 --- a/doc/doxy-api-index.md +++ b/doc/doxy-api-index.md @@ -78,6 +78,7 @@ There are many libraries, so their headers may be grouped by topics: [SCTP] (@ref rte_sctp.h), [TCP] (@ref rte_tcp.h), [UDP] (@ref rte_udp.h), + [frag/reass] (@ref rte_ip_frag.h), [LPM route] (@ref rte_lpm.h), [ACL] (@ref rte_acl.h) diff --git a/doc/doxy-api.conf b/doc/doxy-api.conf index e5a8520339..0af7d372f8 100644 --- a/doc/doxy-api.conf +++ b/doc/doxy-api.conf @@ -35,6 +35,7 @@ INPUT = doc/doxy-api-index.md \ lib/librte_distributor \ lib/librte_ether \ lib/librte_hash \ + lib/librte_ip_frag \ lib/librte_kni \ lib/librte_kvargs \ lib/librte_lpm \ diff --git a/examples/ipv4_frag/main.c b/examples/ipv4_frag/main.c index ff6001538d..88dc1f632a 100644 --- a/examples/ipv4_frag/main.c +++ b/examples/ipv4_frag/main.c @@ -74,6 +74,17 @@ #include "rte_ip_frag.h" #include "main.h" +/* + * Default byte size for the IPv4 Maximum Transfer Unit (MTU). + * This value includes the size of IPv4 header. + */ +#define IPV4_MTU_DEFAULT ETHER_MTU + +/* + * Default payload in bytes for the IPv4 packet. + */ +#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr)) + #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile index 4c353da228..95e917d2e0 100644 --- a/lib/librte_ip_frag/Makefile +++ b/lib/librte_ip_frag/Makefile @@ -31,6 +31,15 @@ include $(RTE_SDK)/mk/rte.vars.mk +# library name +LIB = librte_ip_frag.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +#source files +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c + # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += ipv4_frag_tbl.h diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index a96a439646..7f7e00a708 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -31,9 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __INCLUDE_RTE_IPV4_FRAG_H__ -#define __INCLUDE_RTE_IPV4_FRAG_H__ -#include +#ifndef _RTE_IP_FRAG_H__ +#define _RTE_IP_FRAG_H__ /** * @file @@ -43,67 +42,6 @@ * */ -/* - * Default byte size for the IPv4 Maximum Transfer Unit (MTU). - * This value includes the size of IPv4 header. - */ -#define IPV4_MTU_DEFAULT ETHER_MTU - -/* - * Default payload in bytes for the IPv4 packet. - */ -#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr)) - -/* - * MAX number of fragments per packet allowed. - */ -#define IPV4_MAX_FRAGS_PER_PACKET 0x80 - - -/* Debug on/off */ -#ifdef RTE_IPV4_FRAG_DEBUG - -#define RTE_IPV4_FRAG_ASSERT(exp) \ -if (!(exp)) { \ - rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n", \ - __func__, __LINE__); \ -} - -#else /*RTE_IPV4_FRAG_DEBUG*/ - -#define RTE_IPV4_FRAG_ASSERT(exp) do { } while(0) - -#endif /*RTE_IPV4_FRAG_DEBUG*/ - -/* Fragment Offset */ -#define IPV4_HDR_DF_SHIFT 14 -#define IPV4_HDR_MF_SHIFT 13 -#define IPV4_HDR_FO_SHIFT 3 - -#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) -#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) - -#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1) - -static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, - const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, - uint16_t dofs, uint32_t mf) -{ - rte_memcpy(dst, src, sizeof(*dst)); - fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); - fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); - dst->fragment_offset = rte_cpu_to_be_16(fofs); - dst->total_length = rte_cpu_to_be_16(len); - dst->hdr_checksum = 0; -} - -static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) -{ - uint32_t i; - for (i = 0; i != num; i++) - rte_pktmbuf_free(mb[i]); -} - /** * IPv4 fragmentation. * @@ -113,6 +51,8 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) * The input packet. * @param pkts_out * Array storing the output fragments. + * @param nb_pkts_out + * Number of fragments. * @param mtu_size * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 * datagrams. This value includes the size of the IPv4 header. @@ -123,129 +63,13 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) * @return * Upon successful completion - number of output fragments placed * in the pkts_out array. - * Otherwise - (-1) * . + * Otherwise - (-1) * errno. */ -static inline int32_t rte_ipv4_fragmentation(struct rte_mbuf *pkt_in, +int32_t rte_ipv4_fragmentation(struct rte_mbuf *pkt_in, struct rte_mbuf **pkts_out, uint16_t nb_pkts_out, uint16_t mtu_size, struct rte_mempool *pool_direct, - struct rte_mempool *pool_indirect) -{ - struct rte_mbuf *in_seg = NULL; - struct ipv4_hdr *in_hdr; - uint32_t out_pkt_pos, in_seg_data_pos; - uint32_t more_in_segs; - uint16_t fragment_offset, flag_offset, frag_size; - - frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); - - /* Fragment size should be a multiply of 8. */ - RTE_IPV4_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); - - /* Fragment size should be a multiply of 8. */ - RTE_IPV4_FRAG_ASSERT(IPV4_MAX_FRAGS_PER_PACKET * frag_size >= - (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr))); - - in_hdr = (struct ipv4_hdr*) pkt_in->pkt.data; - flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); - - /* If Don't Fragment flag is set */ - if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) - return (-ENOTSUP); - - /* Check that pkts_out is big enough to hold all fragments */ - if (unlikely (frag_size * nb_pkts_out < - (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr)))) - return (-EINVAL); - - in_seg = pkt_in; - in_seg_data_pos = sizeof(struct ipv4_hdr); - out_pkt_pos = 0; - fragment_offset = 0; - - more_in_segs = 1; - while (likely(more_in_segs)) { - struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; - uint32_t more_out_segs; - struct ipv4_hdr *out_hdr; - - /* Allocate direct buffer */ - out_pkt = rte_pktmbuf_alloc(pool_direct); - if (unlikely(out_pkt == NULL)) { - __free_fragments(pkts_out, out_pkt_pos); - return (-ENOMEM); - } - - /* Reserve space for the IP header that will be built later */ - out_pkt->pkt.data_len = sizeof(struct ipv4_hdr); - out_pkt->pkt.pkt_len = sizeof(struct ipv4_hdr); - - out_seg_prev = out_pkt; - more_out_segs = 1; - while (likely(more_out_segs && more_in_segs)) { - struct rte_mbuf *out_seg = NULL; - uint32_t len; - - /* Allocate indirect buffer */ - out_seg = rte_pktmbuf_alloc(pool_indirect); - if (unlikely(out_seg == NULL)) { - rte_pktmbuf_free(out_pkt); - __free_fragments(pkts_out, out_pkt_pos); - return (-ENOMEM); - } - out_seg_prev->pkt.next = out_seg; - out_seg_prev = out_seg; - - /* Prepare indirect buffer */ - rte_pktmbuf_attach(out_seg, in_seg); - len = mtu_size - out_pkt->pkt.pkt_len; - if (len > (in_seg->pkt.data_len - in_seg_data_pos)) { - len = in_seg->pkt.data_len - in_seg_data_pos; - } - out_seg->pkt.data = (char*) in_seg->pkt.data + (uint16_t)in_seg_data_pos; - out_seg->pkt.data_len = (uint16_t)len; - out_pkt->pkt.pkt_len = (uint16_t)(len + - out_pkt->pkt.pkt_len); - out_pkt->pkt.nb_segs += 1; - in_seg_data_pos += len; - - /* Current output packet (i.e. fragment) done ? */ - if (unlikely(out_pkt->pkt.pkt_len >= mtu_size)) { - more_out_segs = 0; - } - - /* Current input segment done ? */ - if (unlikely(in_seg_data_pos == in_seg->pkt.data_len)) { - in_seg = in_seg->pkt.next; - in_seg_data_pos = 0; - - if (unlikely(in_seg == NULL)) { - more_in_segs = 0; - } - } - } - - /* Build the IP header */ - - out_hdr = (struct ipv4_hdr*) out_pkt->pkt.data; - - __fill_ipv4hdr_frag(out_hdr, in_hdr, - (uint16_t)out_pkt->pkt.pkt_len, - flag_offset, fragment_offset, more_in_segs); - - fragment_offset = (uint16_t)(fragment_offset + - out_pkt->pkt.pkt_len - sizeof(struct ipv4_hdr)); - - out_pkt->ol_flags |= PKT_TX_IP_CKSUM; - out_pkt->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr); - - /* Write the fragment to the output list */ - pkts_out[out_pkt_pos] = out_pkt; - out_pkt_pos ++; - } - - return (out_pkt_pos); -} + struct rte_mempool *pool_indirect); #endif diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c new file mode 100644 index 0000000000..2d33a7b664 --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c @@ -0,0 +1,239 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "rte_ip_frag.h" + +/* + * MAX number of fragments per packet allowed. + */ +#define IPV4_MAX_FRAGS_PER_PACKET 0x80 + +/* Debug on/off */ +#ifdef RTE_IPV4_FRAG_DEBUG + +#define RTE_IPV4_FRAG_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n", \ + __func__, __LINE__); \ +} + +#else /*RTE_IPV4_FRAG_DEBUG*/ + +#define RTE_IPV4_FRAG_ASSERT(exp) do { } while(0) + +#endif /*RTE_IPV4_FRAG_DEBUG*/ + +/* Fragment Offset */ +#define IPV4_HDR_DF_SHIFT 14 +#define IPV4_HDR_MF_SHIFT 13 +#define IPV4_HDR_FO_SHIFT 3 + +#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) +#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) + +#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1) + +static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, + const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, + uint16_t dofs, uint32_t mf) +{ + rte_memcpy(dst, src, sizeof(*dst)); + fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); + fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); + dst->fragment_offset = rte_cpu_to_be_16(fofs); + dst->total_length = rte_cpu_to_be_16(len); + dst->hdr_checksum = 0; +} + +static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + for (i = 0; i != num; i++) + rte_pktmbuf_free(mb[i]); +} + +/** + * IPv4 fragmentation. + * + * This function implements the fragmentation of IPv4 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 + * datagrams. This value includes the size of the IPv4 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * . + */ +int32_t +rte_ipv4_fragmentation(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect) +{ + struct rte_mbuf *in_seg = NULL; + struct ipv4_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; + uint16_t fragment_offset, flag_offset, frag_size; + + frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); + + /* Fragment size should be a multiply of 8. */ + RTE_IPV4_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); + + /* Fragment size should be a multiply of 8. */ + RTE_IPV4_FRAG_ASSERT(IPV4_MAX_FRAGS_PER_PACKET * frag_size >= + (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr))); + + in_hdr = (struct ipv4_hdr*) pkt_in->pkt.data; + flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); + + /* If Don't Fragment flag is set */ + if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) + return (-ENOTSUP); + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely (frag_size * nb_pkts_out < + (uint16_t)(pkt_in->pkt.pkt_len - sizeof (struct ipv4_hdr)))) + return (-EINVAL); + + in_seg = pkt_in; + in_seg_data_pos = sizeof(struct ipv4_hdr); + out_pkt_pos = 0; + fragment_offset = 0; + + more_in_segs = 1; + while (likely(more_in_segs)) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + struct ipv4_hdr *out_hdr; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(pool_direct); + if (unlikely(out_pkt == NULL)) { + __free_fragments(pkts_out, out_pkt_pos); + return (-ENOMEM); + } + + /* Reserve space for the IP header that will be built later */ + out_pkt->pkt.data_len = sizeof(struct ipv4_hdr); + out_pkt->pkt.pkt_len = sizeof(struct ipv4_hdr); + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (likely(more_out_segs && more_in_segs)) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(pool_indirect); + if (unlikely(out_seg == NULL)) { + rte_pktmbuf_free(out_pkt); + __free_fragments(pkts_out, out_pkt_pos); + return (-ENOMEM); + } + out_seg_prev->pkt.next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = mtu_size - out_pkt->pkt.pkt_len; + if (len > (in_seg->pkt.data_len - in_seg_data_pos)) { + len = in_seg->pkt.data_len - in_seg_data_pos; + } + out_seg->pkt.data = (char*) in_seg->pkt.data + (uint16_t)in_seg_data_pos; + out_seg->pkt.data_len = (uint16_t)len; + out_pkt->pkt.pkt_len = (uint16_t)(len + + out_pkt->pkt.pkt_len); + out_pkt->pkt.nb_segs += 1; + in_seg_data_pos += len; + + /* Current output packet (i.e. fragment) done ? */ + if (unlikely(out_pkt->pkt.pkt_len >= mtu_size)) { + more_out_segs = 0; + } + + /* Current input segment done ? */ + if (unlikely(in_seg_data_pos == in_seg->pkt.data_len)) { + in_seg = in_seg->pkt.next; + in_seg_data_pos = 0; + + if (unlikely(in_seg == NULL)) { + more_in_segs = 0; + } + } + } + + /* Build the IP header */ + + out_hdr = (struct ipv4_hdr*) out_pkt->pkt.data; + + __fill_ipv4hdr_frag(out_hdr, in_hdr, + (uint16_t)out_pkt->pkt.pkt_len, + flag_offset, fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt.pkt_len - sizeof(struct ipv4_hdr)); + + out_pkt->ol_flags |= PKT_TX_IP_CKSUM; + out_pkt->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr); + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; + out_pkt_pos ++; + } + + return (out_pkt_pos); +} diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 914c568007..14a26c1e86 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -117,6 +117,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_MBUF),y) LDLIBS += -lrte_mbuf endif +ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) +LDLIBS += -lrte_ip_frag +endif + ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y) LDLIBS += -lethdev endif -- 2.20.1