X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fsfc%2Fsfc_ef100_tx.c;h=9eb42c34d9054db10beed52c87d5e955010cbdc6;hb=f30e69b41f949cd4a9afb6ff39de196e661708e2;hp=7ef085846cdf2ee56817c6519e76f4f973f1a338;hpb=0cb551b690c1c52de3e35ea67c502ddf4477a62b;p=dpdk.git diff --git a/drivers/net/sfc/sfc_ef100_tx.c b/drivers/net/sfc/sfc_ef100_tx.c index 7ef085846c..9eb42c34d9 100644 --- a/drivers/net/sfc/sfc_ef100_tx.c +++ b/drivers/net/sfc/sfc_ef100_tx.c @@ -11,6 +11,7 @@ #include #include +#include #include "efx.h" #include "efx_types.h" @@ -36,6 +37,10 @@ #define SFC_EF100_TX_SEND_DESC_LEN_MAX \ ((1u << ESF_GZ_TX_SEND_LEN_WIDTH) - 1) +/** Maximum length of the segment descriptor data */ +#define SFC_EF100_TX_SEG_DESC_LEN_MAX \ + ((1u << ESF_GZ_TX_SEG_LEN_WIDTH) - 1) + /** * Maximum number of descriptors/buffers in the Tx ring. * It should guarantee that corresponding event queue never overfill. @@ -72,6 +77,13 @@ struct sfc_ef100_txq { unsigned int evq_phase_bit_shift; volatile efx_qword_t *evq_hw_ring; + uint16_t tso_tcp_header_offset_limit; + uint16_t tso_max_nb_header_descs; + uint16_t tso_max_header_len; + uint16_t tso_max_nb_payload_descs; + uint32_t tso_max_payload_len; + uint32_t tso_max_nb_outgoing_frames; + /* Datapath transmit queue anchor */ struct sfc_dp_txq dp; }; @@ -82,6 +94,120 @@ sfc_ef100_txq_by_dp_txq(struct sfc_dp_txq *dp_txq) return container_of(dp_txq, struct sfc_ef100_txq, dp); } +static int +sfc_ef100_tx_prepare_pkt_tso(struct sfc_ef100_txq * const txq, + struct rte_mbuf *m) +{ + size_t header_len = ((m->ol_flags & PKT_TX_TUNNEL_MASK) ? + m->outer_l2_len + m->outer_l3_len : 0) + + m->l2_len + m->l3_len + m->l4_len; + size_t payload_len = m->pkt_len - header_len; + unsigned long mss_conformant_max_payload_len; + unsigned int nb_payload_descs; + +#ifdef RTE_LIBRTE_SFC_EFX_DEBUG + switch (m->ol_flags & PKT_TX_TUNNEL_MASK) { + case 0: + /* FALLTHROUGH */ + case PKT_TX_TUNNEL_VXLAN: + /* FALLTHROUGH */ + case PKT_TX_TUNNEL_GENEVE: + break; + default: + return ENOTSUP; + } +#endif + + mss_conformant_max_payload_len = + m->tso_segsz * txq->tso_max_nb_outgoing_frames; + + /* + * Don't really want to know exact number of payload segments. + * Just use total number of segments as upper limit. Practically + * maximum number of payload segments is significantly bigger + * than maximum number header segments, so we can neglect header + * segments excluded total number of segments to estimate number + * of payload segments required. + */ + nb_payload_descs = m->nb_segs; + + /* + * Carry out multiple independent checks using bitwise OR + * to avoid unnecessary conditional branching. + */ + if (unlikely((header_len > txq->tso_max_header_len) | + (nb_payload_descs > txq->tso_max_nb_payload_descs) | + (payload_len > txq->tso_max_payload_len) | + (payload_len > mss_conformant_max_payload_len) | + (m->pkt_len == header_len))) + return EINVAL; + + return 0; +} + +static uint16_t +sfc_ef100_tx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct sfc_ef100_txq * const txq = sfc_ef100_txq_by_dp_txq(tx_queue); + uint16_t i; + + for (i = 0; i < nb_pkts; i++) { + struct rte_mbuf *m = tx_pkts[i]; + unsigned int max_nb_header_segs = 0; + bool calc_phdr_cksum = false; + int ret; + + /* + * Partial checksum offload is used in the case of + * inner TCP/UDP checksum offload. It requires + * pseudo-header checksum which is calculated below, + * but requires contiguous packet headers. + */ + if ((m->ol_flags & PKT_TX_TUNNEL_MASK) && + (m->ol_flags & PKT_TX_L4_MASK)) { + calc_phdr_cksum = true; + max_nb_header_segs = 1; + } else if (m->ol_flags & PKT_TX_TCP_SEG) { + max_nb_header_segs = txq->tso_max_nb_header_descs; + } + + ret = sfc_dp_tx_prepare_pkt(m, max_nb_header_segs, 0, + txq->tso_tcp_header_offset_limit, + txq->max_fill_level, 1, 0); + if (unlikely(ret != 0)) { + rte_errno = ret; + break; + } + + if (m->ol_flags & PKT_TX_TCP_SEG) { + ret = sfc_ef100_tx_prepare_pkt_tso(txq, m); + if (unlikely(ret != 0)) { + rte_errno = ret; + break; + } + } else if (m->nb_segs > EFX_MASK32(ESF_GZ_TX_SEND_NUM_SEGS)) { + rte_errno = EINVAL; + break; + } + + if (calc_phdr_cksum) { + /* + * Full checksum offload does IPv4 header checksum + * and does not require any assistance. + */ + ret = rte_net_intel_cksum_flags_prepare(m, + m->ol_flags & ~PKT_TX_IP_CKSUM); + if (unlikely(ret != 0)) { + rte_errno = -ret; + break; + } + } + } + + return i; +} + static bool sfc_ef100_tx_get_event(struct sfc_ef100_txq *txq, efx_qword_t *ev) { @@ -183,14 +309,168 @@ sfc_ef100_tx_reap(struct sfc_ef100_txq *txq) sfc_ef100_tx_reap_num_descs(txq, sfc_ef100_tx_process_events(txq)); } +static uint8_t +sfc_ef100_tx_qdesc_cso_inner_l3(uint64_t tx_tunnel) +{ + uint8_t inner_l3; + + switch (tx_tunnel) { + case PKT_TX_TUNNEL_VXLAN: + inner_l3 = ESE_GZ_TX_DESC_CS_INNER_L3_VXLAN; + break; + case PKT_TX_TUNNEL_GENEVE: + inner_l3 = ESE_GZ_TX_DESC_CS_INNER_L3_GENEVE; + break; + default: + inner_l3 = ESE_GZ_TX_DESC_CS_INNER_L3_OFF; + break; + } + return inner_l3; +} + static void sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc) { - EFX_POPULATE_OWORD_4(*tx_desc, + bool outer_l3; + bool outer_l4; + uint8_t inner_l3; + uint8_t partial_en; + uint16_t part_cksum_w; + uint16_t l4_offset_w; + + if ((m->ol_flags & PKT_TX_TUNNEL_MASK) == 0) { + outer_l3 = (m->ol_flags & PKT_TX_IP_CKSUM); + outer_l4 = (m->ol_flags & PKT_TX_L4_MASK); + inner_l3 = ESE_GZ_TX_DESC_CS_INNER_L3_OFF; + partial_en = ESE_GZ_TX_DESC_CSO_PARTIAL_EN_OFF; + part_cksum_w = 0; + l4_offset_w = 0; + } else { + outer_l3 = (m->ol_flags & PKT_TX_OUTER_IP_CKSUM); + outer_l4 = (m->ol_flags & PKT_TX_OUTER_UDP_CKSUM); + inner_l3 = sfc_ef100_tx_qdesc_cso_inner_l3(m->ol_flags & + PKT_TX_TUNNEL_MASK); + + switch (m->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + partial_en = ESE_GZ_TX_DESC_CSO_PARTIAL_EN_TCP; + part_cksum_w = offsetof(struct rte_tcp_hdr, cksum) >> 1; + break; + case PKT_TX_UDP_CKSUM: + partial_en = ESE_GZ_TX_DESC_CSO_PARTIAL_EN_UDP; + part_cksum_w = offsetof(struct rte_udp_hdr, + dgram_cksum) >> 1; + break; + default: + partial_en = ESE_GZ_TX_DESC_CSO_PARTIAL_EN_OFF; + part_cksum_w = 0; + break; + } + l4_offset_w = (m->outer_l2_len + m->outer_l3_len + + m->l2_len + m->l3_len) >> 1; + } + + EFX_POPULATE_OWORD_10(*tx_desc, ESF_GZ_TX_SEND_ADDR, rte_mbuf_data_iova(m), ESF_GZ_TX_SEND_LEN, rte_pktmbuf_data_len(m), - ESF_GZ_TX_SEND_NUM_SEGS, 1, + ESF_GZ_TX_SEND_NUM_SEGS, m->nb_segs, + ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, l4_offset_w, + ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W, part_cksum_w, + ESF_GZ_TX_SEND_CSO_PARTIAL_EN, partial_en, + ESF_GZ_TX_SEND_CSO_INNER_L3, inner_l3, + ESF_GZ_TX_SEND_CSO_OUTER_L3, outer_l3, + ESF_GZ_TX_SEND_CSO_OUTER_L4, outer_l4, ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEND); + + if (m->ol_flags & PKT_TX_VLAN_PKT) { + efx_oword_t tx_desc_extra_fields; + + EFX_POPULATE_OWORD_2(tx_desc_extra_fields, + ESF_GZ_TX_SEND_VLAN_INSERT_EN, 1, + ESF_GZ_TX_SEND_VLAN_INSERT_TCI, m->vlan_tci); + + EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields); + } +} + +static void +sfc_ef100_tx_qdesc_seg_create(rte_iova_t addr, uint16_t len, + efx_oword_t *tx_desc) +{ + EFX_POPULATE_OWORD_3(*tx_desc, + ESF_GZ_TX_SEG_ADDR, addr, + ESF_GZ_TX_SEG_LEN, len, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG); +} + +static void +sfc_ef100_tx_qdesc_tso_create(const struct rte_mbuf *m, + uint16_t nb_header_descs, + uint16_t nb_payload_descs, + size_t header_len, size_t payload_len, + size_t outer_iph_off, size_t outer_udph_off, + size_t iph_off, size_t tcph_off, + efx_oword_t *tx_desc) +{ + efx_oword_t tx_desc_extra_fields; + int ed_outer_udp_len = (outer_udph_off != 0) ? 1 : 0; + int ed_outer_ip_len = (outer_iph_off != 0) ? 1 : 0; + int ed_outer_ip_id = (outer_iph_off != 0) ? + ESE_GZ_TX_DESC_IP4_ID_INC_MOD16 : 0; + /* + * If no tunnel encapsulation is present, then the ED_INNER + * fields should be used. + */ + int ed_inner_ip_id = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; + uint8_t inner_l3 = sfc_ef100_tx_qdesc_cso_inner_l3( + m->ol_flags & PKT_TX_TUNNEL_MASK); + + EFX_POPULATE_OWORD_10(*tx_desc, + ESF_GZ_TX_TSO_MSS, m->tso_segsz, + ESF_GZ_TX_TSO_HDR_NUM_SEGS, nb_header_descs, + ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, nb_payload_descs, + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, ed_outer_ip_id, + ESF_GZ_TX_TSO_ED_INNER_IP4_ID, ed_inner_ip_id, + ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, ed_outer_ip_len, + ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, ed_outer_udp_len, + ESF_GZ_TX_TSO_HDR_LEN_W, header_len >> 1, + ESF_GZ_TX_TSO_PAYLOAD_LEN, payload_len); + + EFX_POPULATE_OWORD_9(tx_desc_extra_fields, + /* + * Outer offsets are required for outer IPv4 ID + * and length edits in the case of tunnel TSO. + */ + ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_iph_off >> 1, + ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_udph_off >> 1, + /* + * Inner offsets are required for inner IPv4 ID + * and IP length edits and partial checksum + * offload in the case of tunnel TSO. + */ + ESF_GZ_TX_TSO_INNER_L3_OFF_W, iph_off >> 1, + ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcph_off >> 1, + ESF_GZ_TX_TSO_CSO_INNER_L4, + inner_l3 != ESE_GZ_TX_DESC_CS_INNER_L3_OFF, + ESF_GZ_TX_TSO_CSO_INNER_L3, inner_l3, + /* + * Use outer full checksum offloads which do + * not require any extra information. + */ + ESF_GZ_TX_TSO_CSO_OUTER_L3, 1, + ESF_GZ_TX_TSO_CSO_OUTER_L4, 1, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO); + + EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields); + + if (m->ol_flags & PKT_TX_VLAN_PKT) { + EFX_POPULATE_OWORD_2(tx_desc_extra_fields, + ESF_GZ_TX_TSO_VLAN_INSERT_EN, 1, + ESF_GZ_TX_TSO_VLAN_INSERT_TCI, m->vlan_tci); + + EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields); + } } static inline void @@ -218,21 +498,125 @@ sfc_ef100_tx_qpush(struct sfc_ef100_txq *txq, unsigned int added) static unsigned int sfc_ef100_tx_pkt_descs_max(const struct rte_mbuf *m) { + unsigned int extra_descs = 0; + /** Maximum length of an mbuf segment data */ #define SFC_MBUF_SEG_LEN_MAX UINT16_MAX RTE_BUILD_BUG_ON(sizeof(m->data_len) != 2); + if (m->ol_flags & PKT_TX_TCP_SEG) { + /* Tx TSO descriptor */ + extra_descs++; + /* + * Extra Tx segment descriptor may be required if header + * ends in the middle of segment. + */ + extra_descs++; + } else { + /* + * mbuf segment cannot be bigger than maximum segment length + * and maximum packet length since TSO is not supported yet. + * Make sure that the first segment does not need fragmentation + * (split into many Tx descriptors). + */ + RTE_BUILD_BUG_ON(SFC_EF100_TX_SEND_DESC_LEN_MAX < + RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, + SFC_MBUF_SEG_LEN_MAX)); + } + + /* + * Any segment of scattered packet cannot be bigger than maximum + * segment length. Make sure that subsequent segments do not need + * fragmentation (split into many Tx descriptors). + */ + RTE_BUILD_BUG_ON(SFC_EF100_TX_SEG_DESC_LEN_MAX < SFC_MBUF_SEG_LEN_MAX); + + return m->nb_segs + extra_descs; +} + +static struct rte_mbuf * +sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, + struct rte_mbuf *m, unsigned int *added) +{ + struct rte_mbuf *m_seg = m; + unsigned int nb_hdr_descs; + unsigned int nb_pld_descs; + unsigned int seg_split = 0; + unsigned int tso_desc_id; + unsigned int id; + size_t outer_iph_off; + size_t outer_udph_off; + size_t iph_off; + size_t tcph_off; + size_t header_len; + size_t remaining_hdr_len; + + if (m->ol_flags & PKT_TX_TUNNEL_MASK) { + outer_iph_off = m->outer_l2_len; + outer_udph_off = outer_iph_off + m->outer_l3_len; + } else { + outer_iph_off = 0; + outer_udph_off = 0; + } + iph_off = outer_udph_off + m->l2_len; + tcph_off = iph_off + m->l3_len; + header_len = tcph_off + m->l4_len; + + /* + * Remember ID of the TX_TSO descriptor to be filled in. + * We can't fill it in right now since we need to calculate + * number of header and payload segments first and don't want + * to traverse it twice here. + */ + tso_desc_id = (*added)++ & txq->ptr_mask; + + remaining_hdr_len = header_len; + do { + id = (*added)++ & txq->ptr_mask; + if (rte_pktmbuf_data_len(m_seg) <= remaining_hdr_len) { + /* The segment is fully header segment */ + sfc_ef100_tx_qdesc_seg_create( + rte_mbuf_data_iova(m_seg), + rte_pktmbuf_data_len(m_seg), + &txq->txq_hw_ring[id]); + remaining_hdr_len -= rte_pktmbuf_data_len(m_seg); + } else { + /* + * The segment must be split into header and + * payload segments + */ + sfc_ef100_tx_qdesc_seg_create( + rte_mbuf_data_iova(m_seg), + remaining_hdr_len, + &txq->txq_hw_ring[id]); + SFC_ASSERT(txq->sw_ring[id].mbuf == NULL); + + id = (*added)++ & txq->ptr_mask; + sfc_ef100_tx_qdesc_seg_create( + rte_mbuf_data_iova(m_seg) + remaining_hdr_len, + rte_pktmbuf_data_len(m_seg) - remaining_hdr_len, + &txq->txq_hw_ring[id]); + remaining_hdr_len = 0; + seg_split = 1; + } + txq->sw_ring[id].mbuf = m_seg; + m_seg = m_seg->next; + } while (remaining_hdr_len > 0); + /* - * mbuf segment cannot be bigger than maximum segment length and - * maximum packet length since TSO is not supported yet. - * Make sure that the first segment does not need fragmentation - * (split into many Tx descriptors). + * If a segment is split into header and payload segments, added + * pointer counts it twice and we should correct it. */ - RTE_BUILD_BUG_ON(SFC_EF100_TX_SEND_DESC_LEN_MAX < - RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, SFC_MBUF_SEG_LEN_MAX)); + nb_hdr_descs = ((id - tso_desc_id) & txq->ptr_mask) - seg_split; + nb_pld_descs = m->nb_segs - nb_hdr_descs + seg_split; - SFC_ASSERT(m->nb_segs == 1); - return 1; + sfc_ef100_tx_qdesc_tso_create(m, nb_hdr_descs, nb_pld_descs, header_len, + rte_pktmbuf_pkt_len(m) - header_len, + outer_iph_off, outer_udph_off, + iph_off, tcph_off, + &txq->txq_hw_ring[tso_desc_id]); + + return m_seg; } static uint16_t @@ -286,25 +670,43 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) break; } - id = added++ & txq->ptr_mask; - sfc_ef100_tx_qdesc_send_create(m_seg, &txq->txq_hw_ring[id]); + if (m_seg->ol_flags & PKT_TX_TCP_SEG) { + m_seg = sfc_ef100_xmit_tso_pkt(txq, m_seg, &added); + } else { + id = added++ & txq->ptr_mask; + sfc_ef100_tx_qdesc_send_create(m_seg, + &txq->txq_hw_ring[id]); + + /* + * rte_pktmbuf_free() is commonly used in DPDK for + * recycling packets - the function checks every + * segment's reference counter and returns the + * buffer to its pool whenever possible; + * nevertheless, freeing mbuf segments one by one + * may entail some performance decline; + * from this point, sfc_efx_tx_reap() does the same job + * on its own and frees buffers in bulks (all mbufs + * within a bulk belong to the same pool); + * from this perspective, individual segment pointers + * must be associated with the corresponding SW + * descriptors independently so that only one loop + * is sufficient on reap to inspect all the buffers + */ + txq->sw_ring[id].mbuf = m_seg; + m_seg = m_seg->next; + } - /* - * rte_pktmbuf_free() is commonly used in DPDK for - * recycling packets - the function checks every - * segment's reference counter and returns the - * buffer to its pool whenever possible; - * nevertheless, freeing mbuf segments one by one - * may entail some performance decline; - * from this point, sfc_efx_tx_reap() does the same job - * on its own and frees buffers in bulks (all mbufs - * within a bulk belong to the same pool); - * from this perspective, individual segment pointers - * must be associated with the corresponding SW - * descriptors independently so that only one loop - * is sufficient on reap to inspect all the buffers - */ - txq->sw_ring[id].mbuf = m_seg; + while (m_seg != NULL) { + RTE_BUILD_BUG_ON(SFC_MBUF_SEG_LEN_MAX > + SFC_EF100_TX_SEG_DESC_LEN_MAX); + + id = added++ & txq->ptr_mask; + sfc_ef100_tx_qdesc_seg_create(rte_mbuf_data_iova(m_seg), + rte_pktmbuf_data_len(m_seg), + &txq->txq_hw_ring[id]); + txq->sw_ring[id].mbuf = m_seg; + m_seg = m_seg->next; + } dma_desc_space -= (added - pkt_start); } @@ -399,6 +801,13 @@ sfc_ef100_tx_qcreate(uint16_t port_id, uint16_t queue_id, (info->hw_index << info->vi_window_shift); txq->evq_hw_ring = info->evq_hw_ring; + txq->tso_tcp_header_offset_limit = info->tso_tcp_header_offset_limit; + txq->tso_max_nb_header_descs = info->tso_max_nb_header_descs; + txq->tso_max_header_len = info->tso_max_header_len; + txq->tso_max_nb_payload_descs = info->tso_max_nb_payload_descs; + txq->tso_max_payload_len = info->tso_max_payload_len; + txq->tso_max_nb_outgoing_frames = info->tso_max_nb_outgoing_frames; + sfc_ef100_tx_debug(txq, "TxQ doorbell is %p", txq->doorbell); *dp_txqp = &txq->dp; @@ -532,7 +941,16 @@ struct sfc_dp_tx sfc_ef100_tx = { }, .features = SFC_DP_TX_FEAT_MULTI_PROCESS, .dev_offload_capa = 0, - .queue_offload_capa = 0, + .queue_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT | + DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_TX_OFFLOAD_OUTER_UDP_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO, .get_dev_info = sfc_ef100_get_dev_info, .qsize_up_rings = sfc_ef100_tx_qsize_up_rings, .qcreate = sfc_ef100_tx_qcreate, @@ -542,5 +960,6 @@ struct sfc_dp_tx sfc_ef100_tx = { .qstop = sfc_ef100_tx_qstop, .qreap = sfc_ef100_tx_qreap, .qdesc_status = sfc_ef100_tx_qdesc_status, + .pkt_prepare = sfc_ef100_tx_prepare_pkts, .pkt_burst = sfc_ef100_xmit_pkts, };