From: Ivan Malov Date: Thu, 15 Dec 2016 12:51:23 +0000 (+0000) Subject: net/sfc: support firmware-assisted TSO X-Git-Tag: spdx-start~4969 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=fec33d5bb3eb;p=dpdk.git net/sfc: support firmware-assisted TSO Signed-off-by: Ivan Malov Signed-off-by: Andrew Rybchenko Reviewed-by: Andrew Lee Reviewed-by: Mark Spender Reviewed-by: Robert Stonehouse --- diff --git a/config/common_base b/config/common_base index 87d171936a..ae42d171c7 100644 --- a/config/common_base +++ b/config/common_base @@ -261,6 +261,7 @@ CONFIG_RTE_LIBRTE_BNXT_PMD=y # CONFIG_RTE_LIBRTE_SFC_EFX_PMD=y CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n +CONFIG_RTE_LIBRTE_SFC_EFX_TSO=n # # Compile software PMD backed by SZEDATA2 device diff --git a/doc/guides/nics/features/sfc_efx.ini b/doc/guides/nics/features/sfc_efx.ini index 07c58d5959..3a15baa5ae 100644 --- a/doc/guides/nics/features/sfc_efx.ini +++ b/doc/guides/nics/features/sfc_efx.ini @@ -11,6 +11,7 @@ Queue start/stop = Y MTU update = Y Jumbo frame = Y Scattered Rx = Y +TSO = Y Promiscuous mode = Y Allmulticast mode = Y Multicast MAC filter = Y diff --git a/doc/guides/nics/sfc_efx.rst b/doc/guides/nics/sfc_efx.rst index bc45b178af..6be4fba97e 100644 --- a/doc/guides/nics/sfc_efx.rst +++ b/doc/guides/nics/sfc_efx.rst @@ -63,6 +63,8 @@ SFC EFX PMD has support for: - Allmulticast mode +- TCP segmentation offload (TSO) + - Multicast MAC filter - IPv4/IPv6 TCP/UDP receive checksum offload @@ -169,6 +171,12 @@ Please note that enabling debugging options may affect system performance. Enable compilation of the extra run-time consistency checks. +- ``CONFIG_RTE_LIBRTE_SFC_EFX_TSO`` (default **n**) + + Toggle TCP segmentation offload support. + Enabling the feature limits the number of available transmit queues + significantly due to the limited number of adapter TSO contexts. + Per-Device Parameters ~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile index dd099b20c3..14d6536a7b 100644 --- a/drivers/net/sfc/Makefile +++ b/drivers/net/sfc/Makefile @@ -90,6 +90,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_port.c SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_rx.c SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_tx.c +SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_TSO) += sfc_tso.c + VPATH += $(SRCDIR)/base SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += efx_bootcfg.c @@ -139,4 +141,6 @@ DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += lib/librte_mempool DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_TSO) += lib/librte_net + include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/sfc/sfc.c b/drivers/net/sfc/sfc.c index 22426fae73..648ad8c588 100644 --- a/drivers/net/sfc/sfc.c +++ b/drivers/net/sfc/sfc.c @@ -621,6 +621,14 @@ sfc_attach(struct sfc_adapter *sa) if (rc != 0) goto fail_set_rss_defaults; +#ifdef RTE_LIBRTE_SFC_EFX_TSO + sa->tso = efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled; + if (!sa->tso) + sfc_warn(sa, "TSO support isn't available on this adapter"); +#else /* !RTE_LIBRTE_SFC_EFX_TSO */ + sa->tso = B_FALSE; +#endif /* RTE_LIBRTE_SFC_EFX_TSO */ + sfc_log_init(sa, "fini nic"); efx_nic_fini(enp); diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h index 33445196bc..8c6c02fe49 100644 --- a/drivers/net/sfc/sfc.h +++ b/drivers/net/sfc/sfc.h @@ -198,6 +198,8 @@ struct sfc_adapter { unsigned int txq_count; struct sfc_txq_info *txq_info; + boolean_t tso; + unsigned int rss_channels; #if EFSYS_OPT_RX_SCALE diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c index 93fff421e6..25200328cc 100644 --- a/drivers/net/sfc/sfc_ethdev.c +++ b/drivers/net/sfc/sfc_ethdev.c @@ -92,6 +92,9 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) } #endif + if (sa->tso) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; + dev_info->rx_desc_lim.nb_max = EFX_RXQ_MAXNDESCS; dev_info->rx_desc_lim.nb_min = EFX_RXQ_MINNDESCS; /* The RXQ hardware requires that the descriptor count is a power diff --git a/drivers/net/sfc/sfc_tso.c b/drivers/net/sfc/sfc_tso.c new file mode 100644 index 0000000000..68d84c90bf --- /dev/null +++ b/drivers/net/sfc/sfc_tso.c @@ -0,0 +1,200 @@ +/*- + * Copyright (c) 2016 Solarflare Communications Inc. + * All rights reserved. + * + * This software was jointly developed between OKTET Labs (under contract + * for Solarflare) and Solarflare Communications, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "sfc.h" +#include "sfc_debug.h" +#include "sfc_tx.h" +#include "sfc_ev.h" + +/** Standard TSO header length */ +#define SFC_TSOH_STD_LEN 256 + +/** The number of TSO option descriptors that precede the packet descriptors */ +#define SFC_TSO_OPDESCS_IDX_SHIFT 2 + +int +sfc_tso_alloc_tsoh_objs(struct sfc_tx_sw_desc *sw_ring, + unsigned int txq_entries, unsigned int socket_id) +{ + unsigned int i; + + for (i = 0; i < txq_entries; ++i) { + sw_ring[i].tsoh = rte_malloc_socket("sfc-txq-tsoh-obj", + SFC_TSOH_STD_LEN, + SFC_TX_SEG_BOUNDARY, + socket_id); + if (sw_ring[i].tsoh == NULL) + goto fail_alloc_tsoh_objs; + } + + return 0; + +fail_alloc_tsoh_objs: + while (i > 0) + rte_free(sw_ring[--i].tsoh); + + return ENOMEM; +} + +void +sfc_tso_free_tsoh_objs(struct sfc_tx_sw_desc *sw_ring, unsigned int txq_entries) +{ + unsigned int i; + + for (i = 0; i < txq_entries; ++i) { + rte_free(sw_ring[i].tsoh); + sw_ring[i].tsoh = NULL; + } +} + +static void +sfc_tso_prepare_header(struct sfc_txq *txq, struct rte_mbuf **in_seg, + size_t *in_off, unsigned int idx, size_t bytes_left) +{ + struct rte_mbuf *m = *in_seg; + size_t bytes_to_copy = 0; + uint8_t *tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh; + + do { + bytes_to_copy = MIN(bytes_left, m->data_len); + + rte_memcpy(tsoh, rte_pktmbuf_mtod(m, uint8_t *), + bytes_to_copy); + + bytes_left -= bytes_to_copy; + tsoh += bytes_to_copy; + + if (bytes_left > 0) { + m = m->next; + SFC_ASSERT(m != NULL); + } + } while (bytes_left > 0); + + if (bytes_to_copy == m->data_len) { + *in_seg = m->next; + *in_off = 0; + } else { + *in_seg = m; + *in_off = bytes_to_copy; + } +} + +int +sfc_tso_do(struct sfc_txq *txq, unsigned int idx, struct rte_mbuf **in_seg, + size_t *in_off, efx_desc_t **pend, unsigned int *pkt_descs, + size_t *pkt_len) +{ + uint8_t *tsoh; + const struct tcp_hdr *th; + efsys_dma_addr_t header_paddr; + efsys_dma_addr_t paddr_next_frag; + uint16_t packet_id; + uint32_t sent_seq; + struct rte_mbuf *m = *in_seg; + size_t nh_off = m->l2_len; /* IP header offset */ + size_t tcph_off = m->l2_len + m->l3_len; /* TCP header offset */ + size_t header_len = m->l2_len + m->l3_len + m->l4_len; + const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic); + + idx += SFC_TSO_OPDESCS_IDX_SHIFT; + + /* Packets which have too big headers should be discarded */ + if (unlikely(header_len > SFC_TSOH_STD_LEN)) + return EMSGSIZE; + + /* + * The TCP header must start at most 208 bytes into the frame. + * If it starts later than this then the NIC won't realise + * it's a TCP packet and TSO edits won't be applied + */ + if (unlikely(tcph_off > encp->enc_tx_tso_tcp_header_offset_limit)) + return EMSGSIZE; + + header_paddr = rte_pktmbuf_mtophys(m); + paddr_next_frag = P2ROUNDUP(header_paddr + 1, SFC_TX_SEG_BOUNDARY); + + /* + * Sometimes headers may be split across multiple mbufs. In such cases + * we need to glue those pieces and store them in some temporary place. + * Also, packet headers must be contiguous in memory, so that + * they can be referred to with a single DMA descriptor. Hence, handle + * the case where the original header crosses a 4K memory boundary + */ + if ((m->data_len < header_len) || + ((paddr_next_frag - header_paddr) < header_len)) { + sfc_tso_prepare_header(txq, in_seg, in_off, idx, header_len); + tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh; + + header_paddr = rte_malloc_virt2phy((void *)tsoh); + } else { + if (m->data_len == header_len) { + *in_off = 0; + *in_seg = m->next; + } else { + *in_off = header_len; + } + + tsoh = rte_pktmbuf_mtod(m, uint8_t *); + } + + /* Handle IP header */ + if (m->ol_flags & PKT_TX_IPV4) { + const struct ipv4_hdr *iphe4; + + iphe4 = (const struct ipv4_hdr *)(tsoh + nh_off); + rte_memcpy(&packet_id, &iphe4->packet_id, sizeof(uint16_t)); + packet_id = rte_be_to_cpu_16(packet_id); + } else if (m->ol_flags & PKT_TX_IPV6) { + packet_id = 0; + } else { + return EINVAL; + } + + /* Handle TCP header */ + th = (const struct tcp_hdr *)(tsoh + tcph_off); + + rte_memcpy(&sent_seq, &th->sent_seq, sizeof(uint32_t)); + sent_seq = rte_be_to_cpu_32(sent_seq); + + efx_tx_qdesc_tso2_create(txq->common, packet_id, sent_seq, m->tso_segsz, + *pend, EFX_TX_FATSOV2_OPT_NDESCS); + + *pend += EFX_TX_FATSOV2_OPT_NDESCS; + *pkt_descs += EFX_TX_FATSOV2_OPT_NDESCS; + + efx_tx_qdesc_dma_create(txq->common, header_paddr, header_len, + B_FALSE, (*pend)++); + (*pkt_descs)++; + *pkt_len -= header_len; + + return 0; +} diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index 86bcfec5c0..3e64c0f0c2 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -184,6 +184,13 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, if (txq->sw_ring == NULL) goto fail_desc_alloc; + if (sa->tso) { + rc = sfc_tso_alloc_tsoh_objs(txq->sw_ring, txq_info->entries, + socket_id); + if (rc != 0) + goto fail_alloc_tsoh_objs; + } + txq->state = SFC_TXQ_INITIALIZED; txq->ptr_mask = txq_info->entries - 1; txq->free_thresh = (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : @@ -199,6 +206,9 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, return 0; +fail_alloc_tsoh_objs: + rte_free(txq->sw_ring); + fail_desc_alloc: rte_free(txq->pend_desc); @@ -234,6 +244,8 @@ sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) SFC_ASSERT(txq != NULL); SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); + sfc_tso_free_tsoh_objs(txq->sw_ring, txq_info->entries); + txq_info->txq = NULL; txq_info->entries = 0; @@ -300,6 +312,11 @@ sfc_tx_init(struct sfc_adapter *sa) sa->txq_count = sa->eth_dev->data->nb_tx_queues; + if (sa->tso) + sa->txq_count = MIN(sa->txq_count, + efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_n_contexts / + efx_nic_cfg_get(sa->nic)->enc_hw_pf_count); + sa->txq_info = rte_calloc_socket("sfc-txqs", sa->txq_count, sizeof(sa->txq_info[0]), 0, sa->socket_id); @@ -373,17 +390,25 @@ sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) * hence, we always enable it here */ if ((txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP) || - (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) + (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) { flags = EFX_TXQ_CKSUM_IPV4; - else + } else { flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; + if (sa->tso) + flags |= EFX_TXQ_FATSOV2; + } + rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, txq_info->entries, 0 /* not used on EF10 */, flags, evq->common, &txq->common, &desc_index); - if (rc != 0) + if (rc != 0) { + if (sa->tso && (rc == ENOSPC)) + sfc_err(sa, "ran out of TSO contexts"); + goto fail_tx_qcreate; + } txq->added = txq->pending = txq->completed = desc_index; txq->hw_vlan_tci = 0; @@ -494,6 +519,13 @@ sfc_tx_start(struct sfc_adapter *sa) sfc_log_init(sa, "txq_count = %u", sa->txq_count); + if (sa->tso) { + if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { + sfc_warn(sa, "TSO support was unable to be restored"); + sa->tso = B_FALSE; + } + } + rc = efx_tx_init(sa->nic); if (rc != 0) goto fail_efx_tx_init; @@ -607,6 +639,7 @@ sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) struct rte_mbuf *m_seg = *pktp; size_t pkt_len = m_seg->pkt_len; unsigned int pkt_descs = 0; + size_t in_off = 0; /* * Here VLAN TCI is expected to be zero in case if no @@ -617,6 +650,46 @@ sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) */ pkt_descs += sfc_tx_maybe_insert_tag(txq, m_seg, &pend); +#ifdef RTE_LIBRTE_SFC_EFX_TSO + if (m_seg->ol_flags & PKT_TX_TCP_SEG) { + /* + * We expect correct 'pkt->l[2, 3, 4]_len' values + * to be set correctly by the caller + */ + if (sfc_tso_do(txq, added, &m_seg, &in_off, &pend, + &pkt_descs, &pkt_len) != 0) { + /* We may have reached this place for + * one of the following reasons: + * + * 1) Packet header length is greater + * than SFC_TSOH_STD_LEN + * 2) TCP header starts at more then + * 208 bytes into the frame + * + * We will deceive RTE saying that we have sent + * the packet, but we will actually drop it. + * Hence, we should revert 'pend' to the + * previous state (in case we have added + * VLAN descriptor) and start processing + * another one packet. But the original + * mbuf shouldn't be orphaned + */ + pend -= pkt_descs; + + rte_pktmbuf_free(*pktp); + + continue; + } + + /* + * We've only added 2 FATSOv2 option descriptors + * and 1 descriptor for the linearized packet header. + * The outstanding work will be done in the same manner + * as for the usual non-TSO path + */ + } +#endif /* RTE_LIBRTE_SFC_EFX_TSO */ + for (; m_seg != NULL; m_seg = m_seg->next) { efsys_dma_addr_t next_frag; size_t seg_len; @@ -624,6 +697,16 @@ sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) seg_len = m_seg->data_len; next_frag = rte_mbuf_data_dma_addr(m_seg); + /* + * If we've started TSO transaction few steps earlier, + * we'll skip packet header using an offset in the + * current segment (which has been set to the + * first one containing payload) + */ + seg_len -= in_off; + next_frag += in_off; + in_off = 0; + do { efsys_dma_addr_t frag_addr = next_frag; size_t frag_len; diff --git a/drivers/net/sfc/sfc_tx.h b/drivers/net/sfc/sfc_tx.h index 4d25c6a19e..581e2aa6bd 100644 --- a/drivers/net/sfc/sfc_tx.h +++ b/drivers/net/sfc/sfc_tx.h @@ -50,6 +50,9 @@ struct sfc_evq; struct sfc_tx_sw_desc { struct rte_mbuf *mbuf; +#ifdef RTE_LIBRTE_SFC_EFX_TSO + uint8_t *tsoh; /* Buffer to store TSO header */ +#endif /* RTE_LIBRTE_SFC_EFX_TSO */ }; enum sfc_txq_state_bit { @@ -113,6 +116,31 @@ void sfc_tx_stop(struct sfc_adapter *sa); uint16_t sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +#ifdef RTE_LIBRTE_SFC_EFX_TSO +/* From 'sfc_tso.c' */ +int sfc_tso_alloc_tsoh_objs(struct sfc_tx_sw_desc *sw_ring, + unsigned int txq_entries, unsigned int socket_id); +void sfc_tso_free_tsoh_objs(struct sfc_tx_sw_desc *sw_ring, + unsigned int txq_entries); +int sfc_tso_do(struct sfc_txq *txq, unsigned int idx, struct rte_mbuf **in_seg, + size_t *in_off, efx_desc_t **pend, unsigned int *pkt_descs, + size_t *pkt_len); +#else /* !RTE_LIBRTE_SFC_EFX_TSO */ +static inline int +sfc_tso_alloc_tsoh_objs(__rte_unused struct sfc_tx_sw_desc *sw_ring, + __rte_unused unsigned int txq_entries, + __rte_unused unsigned int socket_id) +{ + return 0; +} + +static inline void +sfc_tso_free_tsoh_objs(__rte_unused struct sfc_tx_sw_desc *sw_ring, + __rte_unused unsigned int txq_entries) +{ +} +#endif /* RTE_LIBRTE_SFC_EFX_TSO */ + #ifdef __cplusplus } #endif