net/sfc: support firmware-assisted TSO
authorIvan Malov <ivan.malov@oktetlabs.ru>
Thu, 15 Dec 2016 12:51:23 +0000 (12:51 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 17 Jan 2017 18:40:51 +0000 (19:40 +0100)
Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <arybchenko@solarflare.com>
Reviewed-by: Andrew Lee <alee@solarflare.com>
Reviewed-by: Mark Spender <mspender@solarflare.com>
Reviewed-by: Robert Stonehouse <rstonehouse@solarflare.com>
config/common_base
doc/guides/nics/features/sfc_efx.ini
doc/guides/nics/sfc_efx.rst
drivers/net/sfc/Makefile
drivers/net/sfc/sfc.c
drivers/net/sfc/sfc.h
drivers/net/sfc/sfc_ethdev.c
drivers/net/sfc/sfc_tso.c [new file with mode: 0644]
drivers/net/sfc/sfc_tx.c
drivers/net/sfc/sfc_tx.h

index 87d1719..ae42d17 100644 (file)
@@ -261,6 +261,7 @@ CONFIG_RTE_LIBRTE_BNXT_PMD=y
 #
 CONFIG_RTE_LIBRTE_SFC_EFX_PMD=y
 CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n
+CONFIG_RTE_LIBRTE_SFC_EFX_TSO=n
 
 #
 # Compile software PMD backed by SZEDATA2 device
index 07c58d5..3a15baa 100644 (file)
@@ -11,6 +11,7 @@ Queue start/stop     = Y
 MTU update           = Y
 Jumbo frame          = Y
 Scattered Rx         = Y
+TSO                  = Y
 Promiscuous mode     = Y
 Allmulticast mode    = Y
 Multicast MAC filter = Y
index bc45b17..6be4fba 100644 (file)
@@ -63,6 +63,8 @@ SFC EFX PMD has support for:
 
 - Allmulticast mode
 
+- TCP segmentation offload (TSO)
+
 - Multicast MAC filter
 
 - IPv4/IPv6 TCP/UDP receive checksum offload
@@ -169,6 +171,12 @@ Please note that enabling debugging options may affect system performance.
 
   Enable compilation of the extra run-time consistency checks.
 
+- ``CONFIG_RTE_LIBRTE_SFC_EFX_TSO`` (default **n**)
+
+  Toggle TCP segmentation offload support.
+  Enabling the feature limits the number of available transmit queues
+  significantly due to the limited number of adapter TSO contexts.
+
 
 Per-Device Parameters
 ~~~~~~~~~~~~~~~~~~~~~
index dd099b2..14d6536 100644 (file)
@@ -90,6 +90,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_port.c
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_rx.c
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_tx.c
 
+SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_TSO) += sfc_tso.c
+
 VPATH += $(SRCDIR)/base
 
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += efx_bootcfg.c
@@ -139,4 +141,6 @@ DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += lib/librte_mempool
 DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += lib/librte_mbuf
 
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_TSO) += lib/librte_net
+
 include $(RTE_SDK)/mk/rte.lib.mk
index 22426fa..648ad8c 100644 (file)
@@ -621,6 +621,14 @@ sfc_attach(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_set_rss_defaults;
 
+#ifdef RTE_LIBRTE_SFC_EFX_TSO
+       sa->tso = efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled;
+       if (!sa->tso)
+               sfc_warn(sa, "TSO support isn't available on this adapter");
+#else /* !RTE_LIBRTE_SFC_EFX_TSO */
+       sa->tso = B_FALSE;
+#endif /* RTE_LIBRTE_SFC_EFX_TSO */
+
        sfc_log_init(sa, "fini nic");
        efx_nic_fini(enp);
 
index 3344519..8c6c02f 100644 (file)
@@ -198,6 +198,8 @@ struct sfc_adapter {
        unsigned int                    txq_count;
        struct sfc_txq_info             *txq_info;
 
+       boolean_t                       tso;
+
        unsigned int                    rss_channels;
 
 #if EFSYS_OPT_RX_SCALE
index 93fff42..2520032 100644 (file)
@@ -92,6 +92,9 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        }
 #endif
 
+       if (sa->tso)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
+
        dev_info->rx_desc_lim.nb_max = EFX_RXQ_MAXNDESCS;
        dev_info->rx_desc_lim.nb_min = EFX_RXQ_MINNDESCS;
        /* The RXQ hardware requires that the descriptor count is a power
diff --git a/drivers/net/sfc/sfc_tso.c b/drivers/net/sfc/sfc_tso.c
new file mode 100644 (file)
index 0000000..68d84c9
--- /dev/null
@@ -0,0 +1,200 @@
+/*-
+ * Copyright (c) 2016 Solarflare Communications Inc.
+ * All rights reserved.
+ *
+ * This software was jointly developed between OKTET Labs (under contract
+ * for Solarflare) and Solarflare Communications, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include "sfc.h"
+#include "sfc_debug.h"
+#include "sfc_tx.h"
+#include "sfc_ev.h"
+
+/** Standard TSO header length */
+#define SFC_TSOH_STD_LEN        256
+
+/** The number of TSO option descriptors that precede the packet descriptors */
+#define SFC_TSO_OPDESCS_IDX_SHIFT      2
+
+int
+sfc_tso_alloc_tsoh_objs(struct sfc_tx_sw_desc *sw_ring,
+                       unsigned int txq_entries, unsigned int socket_id)
+{
+       unsigned int i;
+
+       for (i = 0; i < txq_entries; ++i) {
+               sw_ring[i].tsoh = rte_malloc_socket("sfc-txq-tsoh-obj",
+                                                   SFC_TSOH_STD_LEN,
+                                                   SFC_TX_SEG_BOUNDARY,
+                                                   socket_id);
+               if (sw_ring[i].tsoh == NULL)
+                       goto fail_alloc_tsoh_objs;
+       }
+
+       return 0;
+
+fail_alloc_tsoh_objs:
+       while (i > 0)
+               rte_free(sw_ring[--i].tsoh);
+
+       return ENOMEM;
+}
+
+void
+sfc_tso_free_tsoh_objs(struct sfc_tx_sw_desc *sw_ring, unsigned int txq_entries)
+{
+       unsigned int i;
+
+       for (i = 0; i < txq_entries; ++i) {
+               rte_free(sw_ring[i].tsoh);
+               sw_ring[i].tsoh = NULL;
+       }
+}
+
+static void
+sfc_tso_prepare_header(struct sfc_txq *txq, struct rte_mbuf **in_seg,
+                      size_t *in_off, unsigned int idx, size_t bytes_left)
+{
+       struct rte_mbuf *m = *in_seg;
+       size_t bytes_to_copy = 0;
+       uint8_t *tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh;
+
+       do {
+               bytes_to_copy = MIN(bytes_left, m->data_len);
+
+               rte_memcpy(tsoh, rte_pktmbuf_mtod(m, uint8_t *),
+                          bytes_to_copy);
+
+               bytes_left -= bytes_to_copy;
+               tsoh += bytes_to_copy;
+
+               if (bytes_left > 0) {
+                       m = m->next;
+                       SFC_ASSERT(m != NULL);
+               }
+       } while (bytes_left > 0);
+
+       if (bytes_to_copy == m->data_len) {
+               *in_seg = m->next;
+               *in_off = 0;
+       } else {
+               *in_seg = m;
+               *in_off = bytes_to_copy;
+       }
+}
+
+int
+sfc_tso_do(struct sfc_txq *txq, unsigned int idx, struct rte_mbuf **in_seg,
+          size_t *in_off, efx_desc_t **pend, unsigned int *pkt_descs,
+          size_t *pkt_len)
+{
+       uint8_t *tsoh;
+       const struct tcp_hdr *th;
+       efsys_dma_addr_t header_paddr;
+       efsys_dma_addr_t paddr_next_frag;
+       uint16_t packet_id;
+       uint32_t sent_seq;
+       struct rte_mbuf *m = *in_seg;
+       size_t nh_off = m->l2_len; /* IP header offset */
+       size_t tcph_off = m->l2_len + m->l3_len; /* TCP header offset */
+       size_t header_len = m->l2_len + m->l3_len + m->l4_len;
+       const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic);
+
+       idx += SFC_TSO_OPDESCS_IDX_SHIFT;
+
+       /* Packets which have too big headers should be discarded */
+       if (unlikely(header_len > SFC_TSOH_STD_LEN))
+               return EMSGSIZE;
+
+       /*
+        * The TCP header must start at most 208 bytes into the frame.
+        * If it starts later than this then the NIC won't realise
+        * it's a TCP packet and TSO edits won't be applied
+        */
+       if (unlikely(tcph_off > encp->enc_tx_tso_tcp_header_offset_limit))
+               return EMSGSIZE;
+
+       header_paddr = rte_pktmbuf_mtophys(m);
+       paddr_next_frag = P2ROUNDUP(header_paddr + 1, SFC_TX_SEG_BOUNDARY);
+
+       /*
+        * Sometimes headers may be split across multiple mbufs. In such cases
+        * we need to glue those pieces and store them in some temporary place.
+        * Also, packet headers must be contiguous in memory, so that
+        * they can be referred to with a single DMA descriptor. Hence, handle
+        * the case where the original header crosses a 4K memory boundary
+        */
+       if ((m->data_len < header_len) ||
+           ((paddr_next_frag - header_paddr) < header_len)) {
+               sfc_tso_prepare_header(txq, in_seg, in_off, idx, header_len);
+               tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh;
+
+               header_paddr = rte_malloc_virt2phy((void *)tsoh);
+       } else {
+               if (m->data_len == header_len) {
+                       *in_off = 0;
+                       *in_seg = m->next;
+               } else {
+                       *in_off = header_len;
+               }
+
+               tsoh = rte_pktmbuf_mtod(m, uint8_t *);
+       }
+
+       /* Handle IP header */
+       if (m->ol_flags & PKT_TX_IPV4) {
+               const struct ipv4_hdr *iphe4;
+
+               iphe4 = (const struct ipv4_hdr *)(tsoh + nh_off);
+               rte_memcpy(&packet_id, &iphe4->packet_id, sizeof(uint16_t));
+               packet_id = rte_be_to_cpu_16(packet_id);
+       } else if (m->ol_flags & PKT_TX_IPV6) {
+               packet_id = 0;
+       } else {
+               return EINVAL;
+       }
+
+       /* Handle TCP header */
+       th = (const struct tcp_hdr *)(tsoh + tcph_off);
+
+       rte_memcpy(&sent_seq, &th->sent_seq, sizeof(uint32_t));
+       sent_seq = rte_be_to_cpu_32(sent_seq);
+
+       efx_tx_qdesc_tso2_create(txq->common, packet_id, sent_seq, m->tso_segsz,
+                                *pend, EFX_TX_FATSOV2_OPT_NDESCS);
+
+       *pend += EFX_TX_FATSOV2_OPT_NDESCS;
+       *pkt_descs += EFX_TX_FATSOV2_OPT_NDESCS;
+
+       efx_tx_qdesc_dma_create(txq->common, header_paddr, header_len,
+                               B_FALSE, (*pend)++);
+       (*pkt_descs)++;
+       *pkt_len -= header_len;
+
+       return 0;
+}
index 86bcfec..3e64c0f 100644 (file)
@@ -184,6 +184,13 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
        if (txq->sw_ring == NULL)
                goto fail_desc_alloc;
 
+       if (sa->tso) {
+               rc = sfc_tso_alloc_tsoh_objs(txq->sw_ring, txq_info->entries,
+                                            socket_id);
+               if (rc != 0)
+                       goto fail_alloc_tsoh_objs;
+       }
+
        txq->state = SFC_TXQ_INITIALIZED;
        txq->ptr_mask = txq_info->entries - 1;
        txq->free_thresh = (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh :
@@ -199,6 +206,9 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
 
        return 0;
 
+fail_alloc_tsoh_objs:
+       rte_free(txq->sw_ring);
+
 fail_desc_alloc:
        rte_free(txq->pend_desc);
 
@@ -234,6 +244,8 @@ sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
        SFC_ASSERT(txq != NULL);
        SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED);
 
+       sfc_tso_free_tsoh_objs(txq->sw_ring, txq_info->entries);
+
        txq_info->txq = NULL;
        txq_info->entries = 0;
 
@@ -300,6 +312,11 @@ sfc_tx_init(struct sfc_adapter *sa)
 
        sa->txq_count = sa->eth_dev->data->nb_tx_queues;
 
+       if (sa->tso)
+               sa->txq_count = MIN(sa->txq_count,
+                  efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_n_contexts /
+                  efx_nic_cfg_get(sa->nic)->enc_hw_pf_count);
+
        sa->txq_info = rte_calloc_socket("sfc-txqs", sa->txq_count,
                                         sizeof(sa->txq_info[0]), 0,
                                         sa->socket_id);
@@ -373,17 +390,25 @@ sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
         * hence, we always enable it here
         */
        if ((txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP) ||
-           (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP))
+           (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) {
                flags = EFX_TXQ_CKSUM_IPV4;
-       else
+       } else {
                flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP;
 
+               if (sa->tso)
+                       flags |= EFX_TXQ_FATSOV2;
+       }
+
        rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem,
                            txq_info->entries, 0 /* not used on EF10 */,
                            flags, evq->common,
                            &txq->common, &desc_index);
-       if (rc != 0)
+       if (rc != 0) {
+               if (sa->tso && (rc == ENOSPC))
+                       sfc_err(sa, "ran out of TSO contexts");
+
                goto fail_tx_qcreate;
+       }
 
        txq->added = txq->pending = txq->completed = desc_index;
        txq->hw_vlan_tci = 0;
@@ -494,6 +519,13 @@ sfc_tx_start(struct sfc_adapter *sa)
 
        sfc_log_init(sa, "txq_count = %u", sa->txq_count);
 
+       if (sa->tso) {
+               if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) {
+                       sfc_warn(sa, "TSO support was unable to be restored");
+                       sa->tso = B_FALSE;
+               }
+       }
+
        rc = efx_tx_init(sa->nic);
        if (rc != 0)
                goto fail_efx_tx_init;
@@ -607,6 +639,7 @@ sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                struct rte_mbuf         *m_seg = *pktp;
                size_t                  pkt_len = m_seg->pkt_len;
                unsigned int            pkt_descs = 0;
+               size_t                  in_off = 0;
 
                /*
                 * Here VLAN TCI is expected to be zero in case if no
@@ -617,6 +650,46 @@ sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                 */
                pkt_descs += sfc_tx_maybe_insert_tag(txq, m_seg, &pend);
 
+#ifdef RTE_LIBRTE_SFC_EFX_TSO
+               if (m_seg->ol_flags & PKT_TX_TCP_SEG) {
+                       /*
+                        * We expect correct 'pkt->l[2, 3, 4]_len' values
+                        * to be set correctly by the caller
+                        */
+                       if (sfc_tso_do(txq, added, &m_seg, &in_off, &pend,
+                                      &pkt_descs, &pkt_len) != 0) {
+                               /* We may have reached this place for
+                                * one of the following reasons:
+                                *
+                                * 1) Packet header length is greater
+                                *    than SFC_TSOH_STD_LEN
+                                * 2) TCP header starts at more then
+                                *    208 bytes into the frame
+                                *
+                                * We will deceive RTE saying that we have sent
+                                * the packet, but we will actually drop it.
+                                * Hence, we should revert 'pend' to the
+                                * previous state (in case we have added
+                                * VLAN descriptor) and start processing
+                                * another one packet. But the original
+                                * mbuf shouldn't be orphaned
+                                */
+                               pend -= pkt_descs;
+
+                               rte_pktmbuf_free(*pktp);
+
+                               continue;
+                       }
+
+                       /*
+                        * We've only added 2 FATSOv2 option descriptors
+                        * and 1 descriptor for the linearized packet header.
+                        * The outstanding work will be done in the same manner
+                        * as for the usual non-TSO path
+                        */
+               }
+#endif /* RTE_LIBRTE_SFC_EFX_TSO */
+
                for (; m_seg != NULL; m_seg = m_seg->next) {
                        efsys_dma_addr_t        next_frag;
                        size_t                  seg_len;
@@ -624,6 +697,16 @@ sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        seg_len = m_seg->data_len;
                        next_frag = rte_mbuf_data_dma_addr(m_seg);
 
+                       /*
+                        * If we've started TSO transaction few steps earlier,
+                        * we'll skip packet header using an offset in the
+                        * current segment (which has been set to the
+                        * first one containing payload)
+                        */
+                       seg_len -= in_off;
+                       next_frag += in_off;
+                       in_off = 0;
+
                        do {
                                efsys_dma_addr_t        frag_addr = next_frag;
                                size_t                  frag_len;
index 4d25c6a..581e2aa 100644 (file)
@@ -50,6 +50,9 @@ struct sfc_evq;
 
 struct sfc_tx_sw_desc {
        struct rte_mbuf         *mbuf;
+#ifdef RTE_LIBRTE_SFC_EFX_TSO
+       uint8_t                 *tsoh;  /* Buffer to store TSO header */
+#endif /* RTE_LIBRTE_SFC_EFX_TSO */
 };
 
 enum sfc_txq_state_bit {
@@ -113,6 +116,31 @@ void sfc_tx_stop(struct sfc_adapter *sa);
 uint16_t sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                       uint16_t nb_pkts);
 
+#ifdef RTE_LIBRTE_SFC_EFX_TSO
+/* From 'sfc_tso.c' */
+int sfc_tso_alloc_tsoh_objs(struct sfc_tx_sw_desc *sw_ring,
+                           unsigned int txq_entries, unsigned int socket_id);
+void sfc_tso_free_tsoh_objs(struct sfc_tx_sw_desc *sw_ring,
+                           unsigned int txq_entries);
+int sfc_tso_do(struct sfc_txq *txq, unsigned int idx, struct rte_mbuf **in_seg,
+              size_t *in_off, efx_desc_t **pend, unsigned int *pkt_descs,
+              size_t *pkt_len);
+#else /* !RTE_LIBRTE_SFC_EFX_TSO */
+static inline int
+sfc_tso_alloc_tsoh_objs(__rte_unused struct sfc_tx_sw_desc *sw_ring,
+                       __rte_unused unsigned int txq_entries,
+                       __rte_unused unsigned int socket_id)
+{
+       return 0;
+}
+
+static inline void
+sfc_tso_free_tsoh_objs(__rte_unused struct sfc_tx_sw_desc *sw_ring,
+                      __rte_unused unsigned int txq_entries)
+{
+}
+#endif /* RTE_LIBRTE_SFC_EFX_TSO */
+
 #ifdef __cplusplus
 }
 #endif