gso: support TCP/IPv4 GSO
authorJiayu Hu <jiayu.hu@intel.com>
Sat, 7 Oct 2017 14:56:40 +0000 (22:56 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Thu, 12 Oct 2017 00:36:57 +0000 (01:36 +0100)
This patch adds GSO support for TCP/IPv4 packets. Supported packets
may include a single VLAN tag. TCP/IPv4 GSO doesn't check if input
packets have correct checksums, and doesn't update checksums for
output packets (the responsibility for this lies with the application).
Additionally, TCP/IPv4 GSO doesn't process IP fragmented packets.

TCP/IPv4 GSO uses two chained MBUFs, one direct MBUF and one indrect
MBUF, to organize an output packet. Note that we refer to these two
chained MBUFs as a two-segment MBUF. The direct MBUF stores the packet
header, while the indirect mbuf simply points to a location within the
original packet's payload. Consequently, use of the GSO library requires
multi-segment MBUF support in the TX functions of the NIC driver.

If a packet is GSO'd, TCP/IPv4 GSO reduces its MBUF refcnt by 1. As a
result, when all of its GSOed segments are freed, the packet is freed
automatically.

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Tested-by: Lei Yao <lei.a.yao@intel.com>
doc/guides/rel_notes/release_17_11.rst
lib/Makefile
lib/librte_eal/common/include/rte_log.h
lib/librte_gso/Makefile
lib/librte_gso/gso_common.c [new file with mode: 0644]
lib/librte_gso/gso_common.h [new file with mode: 0644]
lib/librte_gso/gso_tcp4.c [new file with mode: 0644]
lib/librte_gso/gso_tcp4.h [new file with mode: 0644]
lib/librte_gso/rte_gso.c
lib/librte_gso/rte_gso.h

index ddc4fef..16a4fc4 100644 (file)
@@ -86,6 +86,18 @@ New Features
   See the :ref:`Membership Library <Member_Library>` documentation in
   the Programmers Guide document, for more information.
 
+* **Added the Generic Segmentation Offload Library.**
+
+  Added the Generic Segmentation Offload (GSO) library to enable
+  applications to split large packets (e.g. MTU is 64KB) into small
+  ones (e.g. MTU is 1500B). Supported packet types are:
+
+  * TCP/IPv4 packets.
+
+  The GSO library doesn't check if the input packets have correct
+  checksums, and doesn't update checksums for output packets.
+  Additionally, the GSO library doesn't process IP fragmented packets.
+
 
 Resolved Issues
 ---------------
index 04a37b4..86d475f 100644 (file)
@@ -110,7 +110,7 @@ DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
 DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
-DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
+DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net librte_mempool
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
index ec8dba7..2fa1199 100644 (file)
@@ -87,6 +87,7 @@ extern struct rte_logs rte_logs;
 #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
 #define RTE_LOGTYPE_EFD       18 /**< Log related to EFD. */
 #define RTE_LOGTYPE_EVENTDEV  19 /**< Log related to eventdev. */
+#define RTE_LOGTYPE_GSO       20 /**< Log related to GSO. */
 
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1     24 /**< User-defined log type 1. */
index aeaacbc..2be64d1 100644 (file)
@@ -42,6 +42,8 @@ LIBABIVER := 1
 
 #source files
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c
new file mode 100644 (file)
index 0000000..ee75d4c
--- /dev/null
@@ -0,0 +1,153 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdbool.h>
+#include <errno.h>
+
+#include <rte_memcpy.h>
+#include <rte_mempool.h>
+
+#include "gso_common.h"
+
+static inline void
+hdr_segment_init(struct rte_mbuf *hdr_segment, struct rte_mbuf *pkt,
+               uint16_t pkt_hdr_offset)
+{
+       /* Copy MBUF metadata */
+       hdr_segment->nb_segs = 1;
+       hdr_segment->port = pkt->port;
+       hdr_segment->ol_flags = pkt->ol_flags;
+       hdr_segment->packet_type = pkt->packet_type;
+       hdr_segment->pkt_len = pkt_hdr_offset;
+       hdr_segment->data_len = pkt_hdr_offset;
+       hdr_segment->tx_offload = pkt->tx_offload;
+
+       /* Copy the packet header */
+       rte_memcpy(rte_pktmbuf_mtod(hdr_segment, char *),
+                       rte_pktmbuf_mtod(pkt, char *),
+                       pkt_hdr_offset);
+}
+
+static inline void
+free_gso_segment(struct rte_mbuf **pkts, uint16_t nb_pkts)
+{
+       uint16_t i;
+
+       for (i = 0; i < nb_pkts; i++)
+               rte_pktmbuf_free(pkts[i]);
+}
+
+int
+gso_do_segment(struct rte_mbuf *pkt,
+               uint16_t pkt_hdr_offset,
+               uint16_t pyld_unit_size,
+               struct rte_mempool *direct_pool,
+               struct rte_mempool *indirect_pool,
+               struct rte_mbuf **pkts_out,
+               uint16_t nb_pkts_out)
+{
+       struct rte_mbuf *pkt_in;
+       struct rte_mbuf *hdr_segment, *pyld_segment, *prev_segment;
+       uint16_t pkt_in_data_pos, segment_bytes_remaining;
+       uint16_t pyld_len, nb_segs;
+       bool more_in_pkt, more_out_segs;
+
+       pkt_in = pkt;
+       nb_segs = 0;
+       more_in_pkt = 1;
+       pkt_in_data_pos = pkt_hdr_offset;
+
+       while (more_in_pkt) {
+               if (unlikely(nb_segs >= nb_pkts_out)) {
+                       free_gso_segment(pkts_out, nb_segs);
+                       return -EINVAL;
+               }
+
+               /* Allocate a direct MBUF */
+               hdr_segment = rte_pktmbuf_alloc(direct_pool);
+               if (unlikely(hdr_segment == NULL)) {
+                       free_gso_segment(pkts_out, nb_segs);
+                       return -ENOMEM;
+               }
+               /* Fill the packet header */
+               hdr_segment_init(hdr_segment, pkt, pkt_hdr_offset);
+
+               prev_segment = hdr_segment;
+               segment_bytes_remaining = pyld_unit_size;
+               more_out_segs = 1;
+
+               while (more_out_segs && more_in_pkt) {
+                       /* Allocate an indirect MBUF */
+                       pyld_segment = rte_pktmbuf_alloc(indirect_pool);
+                       if (unlikely(pyld_segment == NULL)) {
+                               rte_pktmbuf_free(hdr_segment);
+                               free_gso_segment(pkts_out, nb_segs);
+                               return -ENOMEM;
+                       }
+                       /* Attach to current MBUF segment of pkt */
+                       rte_pktmbuf_attach(pyld_segment, pkt_in);
+
+                       prev_segment->next = pyld_segment;
+                       prev_segment = pyld_segment;
+
+                       pyld_len = segment_bytes_remaining;
+                       if (pyld_len + pkt_in_data_pos > pkt_in->data_len)
+                               pyld_len = pkt_in->data_len - pkt_in_data_pos;
+
+                       pyld_segment->data_off = pkt_in_data_pos +
+                               pkt_in->data_off;
+                       pyld_segment->data_len = pyld_len;
+
+                       /* Update header segment */
+                       hdr_segment->pkt_len += pyld_len;
+                       hdr_segment->nb_segs++;
+
+                       pkt_in_data_pos += pyld_len;
+                       segment_bytes_remaining -= pyld_len;
+
+                       /* Finish processing a MBUF segment of pkt */
+                       if (pkt_in_data_pos == pkt_in->data_len) {
+                               pkt_in = pkt_in->next;
+                               pkt_in_data_pos = 0;
+                               if (pkt_in == NULL)
+                                       more_in_pkt = 0;
+                       }
+
+                       /* Finish generating a GSO segment */
+                       if (segment_bytes_remaining == 0)
+                               more_out_segs = 0;
+               }
+               pkts_out[nb_segs++] = hdr_segment;
+       }
+       return nb_segs;
+}
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
new file mode 100644 (file)
index 0000000..a8ad638
--- /dev/null
@@ -0,0 +1,141 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_COMMON_H_
+#define _GSO_COMMON_H_
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#define IS_FRAGMENTED(frag_off) (((frag_off) & IPV4_HDR_OFFSET_MASK) != 0 \
+               || ((frag_off) & IPV4_HDR_MF_FLAG) == IPV4_HDR_MF_FLAG)
+
+#define TCP_HDR_PSH_MASK ((uint8_t)0x08)
+#define TCP_HDR_FIN_MASK ((uint8_t)0x01)
+
+#define IS_IPV4_TCP(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4)) == \
+               (PKT_TX_TCP_SEG | PKT_TX_IPV4))
+
+/**
+ * Internal function which updates the TCP header of a packet, following
+ * segmentation. This is required to update the header's 'sent' sequence
+ * number, and also to clear 'PSH' and 'FIN' flags for non-tail segments.
+ *
+ * @param pkt
+ *  The packet containing the TCP header.
+ * @param l4_offset
+ *  The offset of the TCP header from the start of the packet.
+ * @param sent_seq
+ *  The sent sequence number.
+ * @param non-tail
+ *  Indicates whether or not this is a tail segment.
+ */
+static inline void
+update_tcp_header(struct rte_mbuf *pkt, uint16_t l4_offset, uint32_t sent_seq,
+               uint8_t non_tail)
+{
+       struct tcp_hdr *tcp_hdr;
+
+       tcp_hdr = (struct tcp_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+                       l4_offset);
+       tcp_hdr->sent_seq = rte_cpu_to_be_32(sent_seq);
+       if (likely(non_tail))
+               tcp_hdr->tcp_flags &= (~(TCP_HDR_PSH_MASK |
+                                       TCP_HDR_FIN_MASK));
+}
+
+/**
+ * Internal function which updates the IPv4 header of a packet, following
+ * segmentation. This is required to update the header's 'total_length' field,
+ * to reflect the reduced length of the now-segmented packet. Furthermore, the
+ * header's 'packet_id' field must be updated to reflect the new ID of the
+ * now-segmented packet.
+ *
+ * @param pkt
+ *  The packet containing the IPv4 header.
+ * @param l3_offset
+ *  The offset of the IPv4 header from the start of the packet.
+ * @param id
+ *  The new ID of the packet.
+ */
+static inline void
+update_ipv4_header(struct rte_mbuf *pkt, uint16_t l3_offset, uint16_t id)
+{
+       struct ipv4_hdr *ipv4_hdr;
+
+       ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+                       l3_offset);
+       ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - l3_offset);
+       ipv4_hdr->packet_id = rte_cpu_to_be_16(id);
+}
+
+/**
+ * Internal function which divides the input packet into small segments.
+ * Each of the newly-created segments is organized as a two-segment MBUF,
+ * where the first segment is a standard mbuf, which stores a copy of
+ * packet header, and the second is an indirect mbuf which points to a
+ * section of data in the input packet.
+ *
+ * @param pkt
+ *  Packet to segment.
+ * @param pkt_hdr_offset
+ *  Packet header offset, measured in bytes.
+ * @param pyld_unit_size
+ *  The max payload length of a GSO segment.
+ * @param direct_pool
+ *  MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ *  MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ *  Pointer array used to keep the mbuf addresses of output segments. If
+ *  the memory space in pkts_out is insufficient, gso_do_segment() fails
+ *  and returns -EINVAL.
+ * @param nb_pkts_out
+ *  The max number of items that pkts_out can keep.
+ *
+ * @return
+ *  - The number of segments created in the event of success.
+ *  - Return -ENOMEM if run out of memory in MBUF pools.
+ *  - Return -EINVAL for invalid parameters.
+ */
+int gso_do_segment(struct rte_mbuf *pkt,
+               uint16_t pkt_hdr_offset,
+               uint16_t pyld_unit_size,
+               struct rte_mempool *direct_pool,
+               struct rte_mempool *indirect_pool,
+               struct rte_mbuf **pkts_out,
+               uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/gso_tcp4.c b/lib/librte_gso/gso_tcp4.c
new file mode 100644 (file)
index 0000000..0c628cb
--- /dev/null
@@ -0,0 +1,102 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "gso_common.h"
+#include "gso_tcp4.h"
+
+static void
+update_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta,
+               struct rte_mbuf **segs, uint16_t nb_segs)
+{
+       struct ipv4_hdr *ipv4_hdr;
+       struct tcp_hdr *tcp_hdr;
+       uint32_t sent_seq;
+       uint16_t id, tail_idx, i;
+       uint16_t l3_offset = pkt->l2_len;
+       uint16_t l4_offset = l3_offset + pkt->l3_len;
+
+       ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char*) +
+                       l3_offset);
+       tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+       id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+       sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+       tail_idx = nb_segs - 1;
+
+       for (i = 0; i < nb_segs; i++) {
+               update_ipv4_header(segs[i], l3_offset, id);
+               update_tcp_header(segs[i], l4_offset, sent_seq, i < tail_idx);
+               id += ipid_delta;
+               sent_seq += (segs[i]->pkt_len - segs[i]->data_len);
+       }
+}
+
+int
+gso_tcp4_segment(struct rte_mbuf *pkt,
+               uint16_t gso_size,
+               uint8_t ipid_delta,
+               struct rte_mempool *direct_pool,
+               struct rte_mempool *indirect_pool,
+               struct rte_mbuf **pkts_out,
+               uint16_t nb_pkts_out)
+{
+       struct ipv4_hdr *ipv4_hdr;
+       uint16_t pyld_unit_size, hdr_offset;
+       uint16_t frag_off;
+       int ret;
+
+       /* Don't process the fragmented packet */
+       ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+                       pkt->l2_len);
+       frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+       if (unlikely(IS_FRAGMENTED(frag_off))) {
+               pkts_out[0] = pkt;
+               return 1;
+       }
+
+       /* Don't process the packet without data */
+       hdr_offset = pkt->l2_len + pkt->l3_len + pkt->l4_len;
+       if (unlikely(hdr_offset >= pkt->pkt_len)) {
+               pkts_out[0] = pkt;
+               return 1;
+       }
+
+       pyld_unit_size = gso_size - hdr_offset;
+
+       /* Segment the payload */
+       ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+                       indirect_pool, pkts_out, nb_pkts_out);
+       if (ret > 1)
+               update_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret);
+
+       return ret;
+}
diff --git a/lib/librte_gso/gso_tcp4.h b/lib/librte_gso/gso_tcp4.h
new file mode 100644 (file)
index 0000000..1c57441
--- /dev/null
@@ -0,0 +1,74 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_TCP4_H_
+#define _GSO_TCP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment an IPv4/TCP packet. This function doesn't check if the input
+ * packet has correct checksums, and doesn't update checksums for output
+ * GSO segments. Furthermore, it doesn't process IP fragment packets.
+ *
+ * @param pkt
+ *  The packet mbuf to segment.
+ * @param gso_size
+ *  The max length of a GSO segment, measured in bytes.
+ * @param ipid_delta
+ *  The increasing unit of IP ids.
+ * @param direct_pool
+ *  MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ *  MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ *  Pointer array used to store the MBUF addresses of output GSO
+ *  segments, when the function succeeds. If the memory space in
+ *  pkts_out is insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ *  The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ *   - The number of GSO segments filled in pkts_out on success.
+ *   - Return -ENOMEM if run out of memory in MBUF pools.
+ *   - Return -EINVAL for invalid parameters.
+ */
+int gso_tcp4_segment(struct rte_mbuf *pkt,
+               uint16_t gso_size,
+               uint8_t ip_delta,
+               struct rte_mempool *direct_pool,
+               struct rte_mempool *indirect_pool,
+               struct rte_mbuf **pkts_out,
+               uint16_t nb_pkts_out);
+#endif
index b773636..822693f 100644 (file)
 
 #include <errno.h>
 
+#include <rte_log.h>
+#include <rte_ethdev.h>
+
 #include "rte_gso.h"
+#include "gso_common.h"
+#include "gso_tcp4.h"
 
 int
 rte_gso_segment(struct rte_mbuf *pkt,
@@ -41,12 +46,52 @@ rte_gso_segment(struct rte_mbuf *pkt,
                struct rte_mbuf **pkts_out,
                uint16_t nb_pkts_out)
 {
+       struct rte_mempool *direct_pool, *indirect_pool;
+       struct rte_mbuf *pkt_seg;
+       uint64_t ol_flags;
+       uint16_t gso_size;
+       uint8_t ipid_delta;
+       int ret = 1;
+
        if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL ||
-                       nb_pkts_out < 1)
+                       nb_pkts_out < 1 ||
+                       gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN ||
+                       gso_ctx->gso_types != DEV_TX_OFFLOAD_TCP_TSO)
                return -EINVAL;
 
-       pkt->ol_flags &= (~PKT_TX_TCP_SEG);
-       pkts_out[0] = pkt;
+       if (gso_ctx->gso_size >= pkt->pkt_len) {
+               pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+               pkts_out[0] = pkt;
+               return 1;
+       }
+
+       direct_pool = gso_ctx->direct_pool;
+       indirect_pool = gso_ctx->indirect_pool;
+       gso_size = gso_ctx->gso_size;
+       ipid_delta = (gso_ctx->flag != RTE_GSO_FLAG_IPID_FIXED);
+       ol_flags = pkt->ol_flags;
+
+       if (IS_IPV4_TCP(pkt->ol_flags)) {
+               pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+               ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
+                               direct_pool, indirect_pool,
+                               pkts_out, nb_pkts_out);
+       } else {
+               pkts_out[0] = pkt;
+               RTE_LOG(DEBUG, GSO, "Unsupported packet type\n");
+               return 1;
+       }
+
+       if (ret > 1) {
+               pkt_seg = pkt;
+               while (pkt_seg) {
+                       rte_mbuf_refcnt_update(pkt_seg, -1);
+                       pkt_seg = pkt_seg->next;
+               }
+       } else if (ret < 0) {
+               /* Revert the ol_flags in the event of failure. */
+               pkt->ol_flags = ol_flags;
+       }
 
-       return 1;
+       return ret;
 }
index 9d3b4fc..4b77176 100644 (file)
@@ -46,6 +46,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_mbuf.h>
 
+/* Minimum GSO segment size. */
+#define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
+               sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1)
+
 /* GSO flags for rte_gso_ctx. */
 #define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0)
 /**< Use fixed IP ids for output GSO segments. Setting
@@ -81,7 +85,8 @@ struct rte_gso_ctx {
         */
        uint16_t gso_size;
        /**< maximum size of an output GSO segment, including packet
-        * header and payload, measured in bytes.
+        * header and payload, measured in bytes. Must exceed
+        * RTE_GSO_SEG_SIZE_MIN.
         */
 };