From 31987388cef2a48faddf36935ec391d292e373ea Mon Sep 17 00:00:00 2001 From: Cristian Dumitrescu Date: Wed, 4 Jun 2014 19:08:23 +0100 Subject: [PATCH] port: IPv4 reassembly The IPv4 reassembly operation is presented as a Packet Framework port. The code duplication with examples/ip_reassembly sample application to be addressed soon by linking the relevant library once upstreamed. Signed-off-by: Cristian Dumitrescu Acked-by: Pablo de Lara Guarch Acked by: Ivan Boule [Thomas: update to new ip_frag library] Signed-off-by: Thomas Monjalon --- doc/doxy-api-index.md | 1 + lib/librte_port/Makefile | 2 + lib/librte_port/rte_port_ras.c | 252 +++++++++++++++++++++++++++++++++ lib/librte_port/rte_port_ras.h | 83 +++++++++++ 4 files changed, 338 insertions(+) create mode 100644 lib/librte_port/rte_port_ras.c create mode 100644 lib/librte_port/rte_port_ras.h diff --git a/doc/doxy-api-index.md b/doc/doxy-api-index.md index 4b7c2a2380..d967d0e121 100644 --- a/doc/doxy-api-index.md +++ b/doc/doxy-api-index.md @@ -105,6 +105,7 @@ There are many libraries, so their headers may be grouped by topics: [ethdev] (@ref rte_port_ethdev.h), [ring] (@ref rte_port_ring.h), [frag] (@ref rte_port_frag.h), + [reass] (@ref rte_port_ras.h), - **basic**: [approx fraction] (@ref rte_approx.h), diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile index d31bed3d5c..aee7d88f65 100644 --- a/lib/librte_port/Makefile +++ b/lib/librte_port/Makefile @@ -48,6 +48,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) ifeq ($(CONFIG_RTE_MBUF_SCATTER_GATHER),y) SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c endif +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c endif # install includes @@ -58,6 +59,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) ifeq ($(CONFIG_RTE_MBUF_SCATTER_GATHER),y) SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h endif +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h endif # this lib depends upon: diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c new file mode 100644 index 0000000000..b1ac297361 --- /dev/null +++ b/lib/librte_port/rte_port_ras.c @@ -0,0 +1,252 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include +#include +#include +#include + +#include "rte_port_ras.h" + +#ifndef IPV4_RAS_N_BUCKETS +#define IPV4_RAS_N_BUCKETS 4094 +#endif + +#ifndef IPV4_RAS_N_ENTRIES_PER_BUCKET +#define IPV4_RAS_N_ENTRIES_PER_BUCKET 8 +#endif + +#ifndef IPV4_RAS_N_ENTRIES +#define IPV4_RAS_N_ENTRIES (IPV4_RAS_N_BUCKETS * IPV4_RAS_N_ENTRIES_PER_BUCKET) +#endif + +struct rte_port_ring_writer_ipv4_ras { + struct rte_mbuf *tx_buf[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + struct rte_ip_frag_tbl *frag_tbl; + struct rte_ip_frag_death_row death_row; +}; + +static void * +rte_port_ring_writer_ipv4_ras_create(void *params, int socket_id) +{ + struct rte_port_ring_writer_ipv4_ras_params *conf = + (struct rte_port_ring_writer_ipv4_ras_params *) params; + struct rte_port_ring_writer_ipv4_ras *port; + uint64_t frag_cycles; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__); + return NULL; + } + if (conf->ring == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__); + return NULL; + } + if ((conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Parameter tx_burst_sz is invalid\n", + __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate socket\n", __func__); + return NULL; + } + + /* Create fragmentation table */ + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * MS_PER_S; + frag_cycles *= 100; + + port->frag_tbl = rte_ip_frag_table_create( + IPV4_RAS_N_BUCKETS, + IPV4_RAS_N_ENTRIES_PER_BUCKET, + IPV4_RAS_N_ENTRIES, + frag_cycles, + socket_id); + + if (port->frag_tbl == NULL) { + RTE_LOG(ERR, PORT, "%s: rte_ip_frag_table_create failed\n", + __func__); + rte_free(port); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + return port; +} + +static inline void +send_burst(struct rte_port_ring_writer_ipv4_ras *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count); + + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static inline void +process_one(struct rte_port_ring_writer_ipv4_ras *p, struct rte_mbuf *pkt) +{ + /* Assume there is no ethernet header */ + struct ipv4_hdr *pkt_hdr = (struct ipv4_hdr *) + (rte_pktmbuf_mtod(pkt, unsigned char *)); + + /* Get "Do not fragment" flag and fragment offset */ + uint16_t frag_field = rte_be_to_cpu_16(pkt_hdr->fragment_offset); + uint16_t frag_offset = (uint16_t)(frag_field & IPV4_HDR_OFFSET_MASK); + uint16_t frag_flag = (uint16_t)(frag_field & IPV4_HDR_MF_FLAG); + + /* If it is a fragmented packet, then try to reassemble */ + if ((frag_flag == 0) && (frag_offset == 0)) + p->tx_buf[p->tx_buf_count++] = pkt; + else { + struct rte_mbuf *mo; + struct rte_ip_frag_tbl *tbl = p->frag_tbl; + struct rte_ip_frag_death_row *dr = &p->death_row; + + /* Process this fragment */ + mo = rte_ipv4_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), pkt_hdr); + if (mo != NULL) + p->tx_buf[p->tx_buf_count++] = mo; + + rte_ip_frag_free_death_row(&p->death_row, 3); + } +} + +static int +rte_port_ring_writer_ipv4_ras_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_ipv4_ras *p = + (struct rte_port_ring_writer_ipv4_ras *) port; + + process_one(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_writer_ipv4_ras_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ring_writer_ipv4_ras *p = + (struct rte_port_ring_writer_ipv4_ras *) port; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + + process_one(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + process_one(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + pkts_mask &= ~pkt_mask; + } + } + + return 0; +} + +static int +rte_port_ring_writer_ipv4_ras_flush(void *port) +{ + struct rte_port_ring_writer_ipv4_ras *p = + (struct rte_port_ring_writer_ipv4_ras *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_writer_ipv4_ras_free(void *port) +{ + struct rte_port_ring_writer_ipv4_ras *p = + (struct rte_port_ring_writer_ipv4_ras *) port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__); + return -1; + } + + rte_port_ring_writer_ipv4_ras_flush(port); + rte_ip_frag_table_destroy(p->frag_tbl); + rte_free(port); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops = { + .f_create = rte_port_ring_writer_ipv4_ras_create, + .f_free = rte_port_ring_writer_ipv4_ras_free, + .f_tx = rte_port_ring_writer_ipv4_ras_tx, + .f_tx_bulk = rte_port_ring_writer_ipv4_ras_tx_bulk, + .f_flush = rte_port_ring_writer_ipv4_ras_flush, +}; diff --git a/lib/librte_port/rte_port_ras.h b/lib/librte_port/rte_port_ras.h new file mode 100644 index 0000000000..c6ed688c51 --- /dev/null +++ b/lib/librte_port/rte_port_ras.h @@ -0,0 +1,83 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RAS_H__ +#define __INCLUDE_RTE_PORT_RAS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Reassembly + * + * This port is built on top of pre-initialized single producer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP reassembly functionality is executed on ring write operation, + * hence this port is implemented as an output port. A regular ring_reader port + * can be created to read from the same ring. + * + * The packets written to the ring are either complete IP datagrams or IP + * fragments. The packets read from the ring are all complete IP datagrams, + * either jumbo frames (i.e. IP packets with length bigger than MTU) or not. + * The complete IP datagrams written to the ring are not changed. The IP + * fragments written to the ring are first reassembled and into complete IP + * datagrams or dropped on error or IP reassembly time-out. + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_writer_ipv4_ras port parameters */ +struct rte_port_ring_writer_ipv4_ras_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be bigger + or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ring_writer_ipv4_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops; + +#ifdef __cplusplus +} +#endif + +#endif -- 2.20.1