From: Mark Kavanagh Date: Sat, 7 Oct 2017 14:56:41 +0000 (+0800) Subject: gso: support VxLAN GSO X-Git-Tag: spdx-start~1588 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=b058d92ea95d;p=dpdk.git gso: support VxLAN GSO This patch adds a framework that allows GSO on tunneled packets. Furthermore, it leverages that framework to provide GSO support for VxLAN-encapsulated packets. Supported VxLAN packets must have an outer IPv4 header (prepended by an optional VLAN tag), and contain an inner TCP/IPv4 packet (with an optional inner VLAN tag). VxLAN GSO doesn't check if input packets have correct checksums and doesn't update checksums for output packets. Additionally, it doesn't process IP fragmented packets. As with TCP/IPv4 GSO, VxLAN GSO uses a two-segment MBUF to organize each output packet, which mandates support for multi-segment mbufs in the TX functions of the NIC driver. Also, if a packet is GSOed, VxLAN GSO reduces its MBUF refcnt by 1. As a result, when all of its GSO'd segments are freed, the packet is freed automatically. Signed-off-by: Mark Kavanagh Signed-off-by: Jiayu Hu Acked-by: Konstantin Ananyev --- diff --git a/doc/guides/rel_notes/release_17_11.rst b/doc/guides/rel_notes/release_17_11.rst index 16a4fc40d9..3134723d19 100644 --- a/doc/guides/rel_notes/release_17_11.rst +++ b/doc/guides/rel_notes/release_17_11.rst @@ -93,6 +93,8 @@ New Features ones (e.g. MTU is 1500B). Supported packet types are: * TCP/IPv4 packets. + * VxLAN packets, which must have an outer IPv4 header, and contain + an inner TCP/IPv4 packet. The GSO library doesn't check if the input packets have correct checksums, and doesn't update checksums for output packets. diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile index 2be64d111f..e6d41df7d8 100644 --- a/lib/librte_gso/Makefile +++ b/lib/librte_gso/Makefile @@ -44,6 +44,7 @@ LIBABIVER := 1 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c +SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h index a8ad638ef2..95d54e7da0 100644 --- a/lib/librte_gso/gso_common.h +++ b/lib/librte_gso/gso_common.h @@ -39,6 +39,7 @@ #include #include #include +#include #define IS_FRAGMENTED(frag_off) (((frag_off) & IPV4_HDR_OFFSET_MASK) != 0 \ || ((frag_off) & IPV4_HDR_MF_FLAG) == IPV4_HDR_MF_FLAG) @@ -49,6 +50,30 @@ #define IS_IPV4_TCP(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4)) == \ (PKT_TX_TCP_SEG | PKT_TX_IPV4)) +#define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \ + PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \ + (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ + PKT_TX_TUNNEL_VXLAN)) + +/** + * Internal function which updates the UDP header of a packet, following + * segmentation. This is required to update the header's datagram length field. + * + * @param pkt + * The packet containing the UDP header. + * @param udp_offset + * The offset of the UDP header from the start of the packet. + */ +static inline void +update_udp_header(struct rte_mbuf *pkt, uint16_t udp_offset) +{ + struct udp_hdr *udp_hdr; + + udp_hdr = (struct udp_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + udp_offset); + udp_hdr->dgram_len = rte_cpu_to_be_16(pkt->pkt_len - udp_offset); +} + /** * Internal function which updates the TCP header of a packet, following * segmentation. This is required to update the header's 'sent' sequence diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c new file mode 100644 index 0000000000..5e8c8e58ea --- /dev/null +++ b/lib/librte_gso/gso_tunnel_tcp4.c @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gso_common.h" +#include "gso_tunnel_tcp4.h" + +static void +update_tunnel_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta, + struct rte_mbuf **segs, uint16_t nb_segs) +{ + struct ipv4_hdr *ipv4_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t sent_seq; + uint16_t outer_id, inner_id, tail_idx, i; + uint16_t outer_ipv4_offset, inner_ipv4_offset, udp_offset, tcp_offset; + + outer_ipv4_offset = pkt->outer_l2_len; + udp_offset = outer_ipv4_offset + pkt->outer_l3_len; + inner_ipv4_offset = udp_offset + pkt->l2_len; + tcp_offset = inner_ipv4_offset + pkt->l3_len; + + /* Outer IPv4 header. */ + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + outer_ipv4_offset); + outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + + /* Inner IPv4 header. */ + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + inner_ipv4_offset); + inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + tail_idx = nb_segs - 1; + + for (i = 0; i < nb_segs; i++) { + update_ipv4_header(segs[i], outer_ipv4_offset, outer_id); + update_udp_header(segs[i], udp_offset); + update_ipv4_header(segs[i], inner_ipv4_offset, inner_id); + update_tcp_header(segs[i], tcp_offset, sent_seq, i < tail_idx); + outer_id++; + inner_id += ipid_delta; + sent_seq += (segs[i]->pkt_len - segs[i]->data_len); + } +} + +int +gso_tunnel_tcp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + uint8_t ipid_delta, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out) +{ + struct ipv4_hdr *inner_ipv4_hdr; + uint16_t pyld_unit_size, hdr_offset, frag_off; + int ret = 1; + + hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len; + inner_ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + hdr_offset); + /* + * Don't process the packet whose MF bit or offset in the inner + * IPv4 header are non-zero. + */ + frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset); + if (unlikely(IS_FRAGMENTED(frag_off))) { + pkts_out[0] = pkt; + return 1; + } + + hdr_offset += pkt->l3_len + pkt->l4_len; + /* Don't process the packet without data */ + if (hdr_offset >= pkt->pkt_len) { + pkts_out[0] = pkt; + return 1; + } + pyld_unit_size = gso_size - hdr_offset; + + /* Segment the payload */ + ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool, + indirect_pool, pkts_out, nb_pkts_out); + if (ret <= 1) + return ret; + + update_tunnel_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret); + + return ret; +} diff --git a/lib/librte_gso/gso_tunnel_tcp4.h b/lib/librte_gso/gso_tunnel_tcp4.h new file mode 100644 index 0000000000..3c67f0cd0a --- /dev/null +++ b/lib/librte_gso/gso_tunnel_tcp4.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GSO_TUNNEL_TCP4_H_ +#define _GSO_TUNNEL_TCP4_H_ + +#include +#include + +/** + * Segment a tunneling packet with inner TCP/IPv4 headers. This function + * doesn't check if the input packet has correct checksums, and doesn't + * update checksums for output GSO segments. Furthermore, it doesn't + * process IP fragment packets. + * + * @param pkt + * The packet mbuf to segment. + * @param gso_size + * The max length of a GSO segment, measured in bytes. + * @param ipid_delta + * The increasing unit of IP ids. + * @param direct_pool + * MBUF pool used for allocating direct buffers for output segments. + * @param indirect_pool + * MBUF pool used for allocating indirect buffers for output segments. + * @param pkts_out + * Pointer array used to store the MBUF addresses of output GSO + * segments, when it succeeds. If the memory space in pkts_out is + * insufficient, it fails and returns -EINVAL. + * @param nb_pkts_out + * The max number of items that 'pkts_out' can keep. + * + * @return + * - The number of GSO segments filled in pkts_out on success. + * - Return -ENOMEM if run out of memory in MBUF pools. + * - Return -EINVAL for invalid parameters. + */ +int gso_tunnel_tcp4_segment(struct rte_mbuf *pkt, + uint16_t gso_size, + uint8_t ipid_delta, + struct rte_mempool *direct_pool, + struct rte_mempool *indirect_pool, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out); +#endif diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c index 822693fa02..0a3ef11830 100644 --- a/lib/librte_gso/rte_gso.c +++ b/lib/librte_gso/rte_gso.c @@ -39,6 +39,7 @@ #include "rte_gso.h" #include "gso_common.h" #include "gso_tcp4.h" +#include "gso_tunnel_tcp4.h" int rte_gso_segment(struct rte_mbuf *pkt, @@ -56,7 +57,8 @@ rte_gso_segment(struct rte_mbuf *pkt, if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL || nb_pkts_out < 1 || gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN || - gso_ctx->gso_types != DEV_TX_OFFLOAD_TCP_TSO) + ((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) == 0)) return -EINVAL; if (gso_ctx->gso_size >= pkt->pkt_len) { @@ -71,12 +73,20 @@ rte_gso_segment(struct rte_mbuf *pkt, ipid_delta = (gso_ctx->flag != RTE_GSO_FLAG_IPID_FIXED); ol_flags = pkt->ol_flags; - if (IS_IPV4_TCP(pkt->ol_flags)) { + if (IS_IPV4_VXLAN_TCP4(pkt->ol_flags) + && (gso_ctx->gso_types & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) { + pkt->ol_flags &= (~PKT_TX_TCP_SEG); + ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta, + direct_pool, indirect_pool, + pkts_out, nb_pkts_out); + } else if (IS_IPV4_TCP(pkt->ol_flags) && + (gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) { pkt->ol_flags &= (~PKT_TX_TCP_SEG); ret = gso_tcp4_segment(pkt, gso_size, ipid_delta, direct_pool, indirect_pool, pkts_out, nb_pkts_out); } else { + /* unsupported packet, skip */ pkts_out[0] = pkt; RTE_LOG(DEBUG, GSO, "Unsupported packet type\n"); return 1;