X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fsfc%2Fsfc_ef10_tx.c;h=12387972669e20707d9da89eee2e6a9d63f3aa24;hb=36efba2f93c45b18cf21642034517206d4784d42;hp=74bd82203e8b9eaa9e5f94531c4293125ad39623;hpb=5688520075b5cdbcb568994b2d1361aeab565986;p=dpdk.git diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c index 74bd82203e..1238797266 100644 --- a/drivers/net/sfc/sfc_ef10_tx.c +++ b/drivers/net/sfc/sfc_ef10_tx.c @@ -1,32 +1,10 @@ -/*- - * BSD LICENSE +/* SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 2016 Solarflare Communications Inc. + * Copyright (c) 2016-2018 Solarflare Communications Inc. * All rights reserved. * * This software was jointly developed between OKTET Labs (under contract * for Solarflare) and Solarflare Communications, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include @@ -47,19 +25,10 @@ #define sfc_ef10_tx_err(dpq, ...) \ SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__) -/** Maximum length of the mbuf segment data */ -#define SFC_MBUF_SEG_LEN_MAX \ - ((1u << (8 * sizeof(((struct rte_mbuf *)0)->data_len))) - 1) - /** Maximum length of the DMA descriptor data */ #define SFC_EF10_TX_DMA_DESC_LEN_MAX \ ((1u << ESF_DZ_TX_KER_BYTE_CNT_WIDTH) - 1) -/** Maximum number of DMA descriptors per mbuf segment */ -#define SFC_EF10_TX_MBUF_SEG_DESCS_MAX \ - SFC_DIV_ROUND_UP(SFC_MBUF_SEG_LEN_MAX, \ - SFC_EF10_TX_DMA_DESC_LEN_MAX) - /** * Maximum number of descriptors/buffers in the Tx ring. * It should guarantee that corresponding event queue never overfill. @@ -86,6 +55,7 @@ struct sfc_ef10_txq { unsigned int ptr_mask; unsigned int added; unsigned int completed; + unsigned int max_fill_level; unsigned int free_thresh; unsigned int evq_read_ptr; struct sfc_ef10_tx_sw_desc *sw_ring; @@ -137,14 +107,10 @@ sfc_ef10_tx_get_event(struct sfc_ef10_txq *txq, efx_qword_t *tx_ev) return true; } -static void -sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) +static unsigned int +sfc_ef10_tx_process_events(struct sfc_ef10_txq *txq) { - const unsigned int old_read_ptr = txq->evq_read_ptr; - const unsigned int ptr_mask = txq->ptr_mask; - unsigned int completed = txq->completed; - unsigned int pending = completed; - const unsigned int curr_done = pending - 1; + const unsigned int curr_done = txq->completed - 1; unsigned int anew_done = curr_done; efx_qword_t tx_ev; @@ -157,20 +123,49 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) /* Update the latest done descriptor */ anew_done = EFX_QWORD_FIELD(tx_ev, ESF_DZ_TX_DESCR_INDX); } - pending += (anew_done - curr_done) & ptr_mask; + return (anew_done - curr_done) & txq->ptr_mask; +} + +static void +sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) +{ + const unsigned int old_read_ptr = txq->evq_read_ptr; + const unsigned int ptr_mask = txq->ptr_mask; + unsigned int completed = txq->completed; + unsigned int pending = completed; + + pending += sfc_ef10_tx_process_events(txq); if (pending != completed) { + struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE]; + unsigned int nb = 0; + do { struct sfc_ef10_tx_sw_desc *txd; + struct rte_mbuf *m; txd = &txq->sw_ring[completed & ptr_mask]; - - if (txd->mbuf != NULL) { - rte_pktmbuf_free(txd->mbuf); - txd->mbuf = NULL; + if (txd->mbuf == NULL) + continue; + + m = rte_pktmbuf_prefree_seg(txd->mbuf); + txd->mbuf = NULL; + if (m == NULL) + continue; + + if ((nb == RTE_DIM(bulk)) || + ((nb != 0) && (m->pool != bulk[0]->pool))) { + rte_mempool_put_bulk(bulk[0]->pool, + (void *)bulk, nb); + nb = 0; } + + bulk[nb++] = m; } while (++completed != pending); + if (nb != 0) + rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb); + txq->completed = completed; } @@ -179,7 +174,7 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) } static void -sfc_ef10_tx_qdesc_dma_create(phys_addr_t addr, uint16_t size, bool eop, +sfc_ef10_tx_qdesc_dma_create(rte_iova_t addr, uint16_t size, bool eop, efx_qword_t *edp) { EFX_POPULATE_QWORD_4(*edp, @@ -221,11 +216,57 @@ sfc_ef10_tx_qpush(struct sfc_ef10_txq *txq, unsigned int added, *(volatile __m128i *)txq->doorbell = oword.eo_u128[0]; } +static unsigned int +sfc_ef10_tx_pkt_descs_max(const struct rte_mbuf *m) +{ + unsigned int extra_descs_per_seg; + unsigned int extra_descs_per_pkt; + + /* + * VLAN offload is not supported yet, so no extra descriptors + * are required for VLAN option descriptor. + */ + +/** Maximum length of the mbuf segment data */ +#define SFC_MBUF_SEG_LEN_MAX UINT16_MAX + RTE_BUILD_BUG_ON(sizeof(m->data_len) != 2); + + /* + * Each segment is already counted once below. So, calculate + * how many extra DMA descriptors may be required per segment in + * the worst case because of maximum DMA descriptor length limit. + * If maximum segment length is less or equal to maximum DMA + * descriptor length, no extra DMA descriptors are required. + */ + extra_descs_per_seg = + (SFC_MBUF_SEG_LEN_MAX - 1) / SFC_EF10_TX_DMA_DESC_LEN_MAX; + +/** Maximum length of the packet */ +#define SFC_MBUF_PKT_LEN_MAX UINT32_MAX + RTE_BUILD_BUG_ON(sizeof(m->pkt_len) != 4); + + /* + * One more limitation on maximum number of extra DMA descriptors + * comes from slicing entire packet because of DMA descriptor length + * limit taking into account that there is at least one segment + * which is already counted below (so division of the maximum + * packet length minus one with round down). + * TSO is not supported yet, so packet length is limited by + * maximum PDU size. + */ + extra_descs_per_pkt = + (RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, + SFC_MBUF_PKT_LEN_MAX) - 1) / + SFC_EF10_TX_DMA_DESC_LEN_MAX; + + return m->nb_segs + RTE_MIN(m->nb_segs * extra_descs_per_seg, + extra_descs_per_pkt); +} + static uint16_t sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct sfc_ef10_txq * const txq = sfc_ef10_txq_by_dp_txq(tx_queue); - unsigned int ptr_mask; unsigned int added; unsigned int dma_desc_space; bool reap_done; @@ -236,16 +277,13 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) (SFC_EF10_TXQ_NOT_RUNNING | SFC_EF10_TXQ_EXCEPTION))) return 0; - ptr_mask = txq->ptr_mask; added = txq->added; - dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - - (added - txq->completed); + dma_desc_space = txq->max_fill_level - (added - txq->completed); reap_done = (dma_desc_space < txq->free_thresh); if (reap_done) { sfc_ef10_tx_reap(txq); - dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - - (added - txq->completed); + dma_desc_space = txq->max_fill_level - (added - txq->completed); } for (pktp = &tx_pkts[0], pktp_end = &tx_pkts[nb_pkts]; @@ -258,8 +296,7 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) if (likely(pktp + 1 != pktp_end)) rte_mbuf_prefetch_part1(pktp[1]); - if (m_seg->nb_segs * SFC_EF10_TX_MBUF_SEG_DESCS_MAX > - dma_desc_space) { + if (sfc_ef10_tx_pkt_descs_max(m_seg) > dma_desc_space) { if (reap_done) break; @@ -271,17 +308,17 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) sfc_ef10_tx_reap(txq); reap_done = true; - dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - + dma_desc_space = txq->max_fill_level - (added - txq->completed); - if (m_seg->nb_segs * SFC_EF10_TX_MBUF_SEG_DESCS_MAX > - dma_desc_space) + if (sfc_ef10_tx_pkt_descs_max(m_seg) > dma_desc_space) break; } pkt_len = m_seg->pkt_len; do { - phys_addr_t seg_addr = rte_mbuf_data_dma_addr(m_seg); + rte_iova_t seg_addr = rte_mbuf_data_iova(m_seg); unsigned int seg_len = rte_pktmbuf_data_len(m_seg); + unsigned int id = added & txq->ptr_mask; SFC_ASSERT(seg_len <= SFC_EF10_TX_DMA_DESC_LEN_MAX); @@ -289,15 +326,30 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) sfc_ef10_tx_qdesc_dma_create(seg_addr, seg_len, (pkt_len == 0), - &txq->txq_hw_ring[added & ptr_mask]); + &txq->txq_hw_ring[id]); + + /* + * rte_pktmbuf_free() is commonly used in DPDK for + * recycling packets - the function checks every + * segment's reference counter and returns the + * buffer to its pool whenever possible; + * nevertheless, freeing mbuf segments one by one + * may entail some performance decline; + * from this point, sfc_efx_tx_reap() does the same job + * on its own and frees buffers in bulks (all mbufs + * within a bulk belong to the same pool); + * from this perspective, individual segment pointers + * must be associated with the corresponding SW + * descriptors independently so that only one loop + * is sufficient on reap to inspect all the buffers + */ + txq->sw_ring[id].mbuf = m_seg; + ++added; } while ((m_seg = m_seg->next) != 0); dma_desc_space -= (added - pkt_start); - - /* Assign mbuf to the last used desc */ - txq->sw_ring[(added - 1) & ptr_mask].mbuf = *pktp; } if (likely(added != txq->added)) { @@ -313,6 +365,44 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return pktp - &tx_pkts[0]; } +static void +sfc_ef10_simple_tx_reap(struct sfc_ef10_txq *txq) +{ + const unsigned int old_read_ptr = txq->evq_read_ptr; + const unsigned int ptr_mask = txq->ptr_mask; + unsigned int completed = txq->completed; + unsigned int pending = completed; + + pending += sfc_ef10_tx_process_events(txq); + + if (pending != completed) { + struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE]; + unsigned int nb = 0; + + do { + struct sfc_ef10_tx_sw_desc *txd; + + txd = &txq->sw_ring[completed & ptr_mask]; + + if (nb == RTE_DIM(bulk)) { + rte_mempool_put_bulk(bulk[0]->pool, + (void *)bulk, nb); + nb = 0; + } + + bulk[nb++] = txd->mbuf; + } while (++completed != pending); + + rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb); + + txq->completed = completed; + } + + sfc_ef10_ev_qclear(txq->evq_hw_ring, ptr_mask, old_read_ptr, + txq->evq_read_ptr); +} + + static uint16_t sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -331,14 +421,12 @@ sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, ptr_mask = txq->ptr_mask; added = txq->added; - dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - - (added - txq->completed); + dma_desc_space = txq->max_fill_level - (added - txq->completed); reap_done = (dma_desc_space < RTE_MAX(txq->free_thresh, nb_pkts)); if (reap_done) { - sfc_ef10_tx_reap(txq); - dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - - (added - txq->completed); + sfc_ef10_simple_tx_reap(txq); + dma_desc_space = txq->max_fill_level - (added - txq->completed); } pktp_end = &tx_pkts[MIN(nb_pkts, dma_desc_space)]; @@ -349,7 +437,7 @@ sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, SFC_ASSERT(rte_pktmbuf_data_len(pkt) <= SFC_EF10_TX_DMA_DESC_LEN_MAX); - sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_dma_addr(pkt), + sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_iova(pkt), rte_pktmbuf_data_len(pkt), true, &txq->txq_hw_ring[id]); @@ -365,12 +453,46 @@ sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE if (!reap_done) - sfc_ef10_tx_reap(txq); + sfc_ef10_simple_tx_reap(txq); #endif return pktp - &tx_pkts[0]; } +static sfc_dp_tx_get_dev_info_t sfc_ef10_get_dev_info; +static void +sfc_ef10_get_dev_info(struct rte_eth_dev_info *dev_info) +{ + /* + * Number of descriptors just defines maximum number of pushed + * descriptors (fill level). + */ + dev_info->tx_desc_lim.nb_min = 1; + dev_info->tx_desc_lim.nb_align = 1; +} + +static sfc_dp_tx_qsize_up_rings_t sfc_ef10_tx_qsize_up_rings; +static int +sfc_ef10_tx_qsize_up_rings(uint16_t nb_tx_desc, + unsigned int *txq_entries, + unsigned int *evq_entries, + unsigned int *txq_max_fill_level) +{ + /* + * rte_ethdev API guarantees that the number meets min, max and + * alignment requirements. + */ + if (nb_tx_desc <= EFX_TXQ_MINNDESCS) + *txq_entries = EFX_TXQ_MINNDESCS; + else + *txq_entries = rte_align32pow2(nb_tx_desc); + + *evq_entries = *txq_entries; + + *txq_max_fill_level = RTE_MIN(nb_tx_desc, + SFC_EF10_TXQ_LIMIT(*evq_entries)); + return 0; +} static sfc_dp_tx_qcreate_t sfc_ef10_tx_qcreate; static int @@ -404,6 +526,7 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id, txq->flags = SFC_EF10_TXQ_NOT_RUNNING; txq->ptr_mask = info->txq_entries - 1; + txq->max_fill_level = info->max_fill_level; txq->free_thresh = info->free_thresh; txq->txq_hw_ring = info->txq_hw_ring; txq->doorbell = (volatile uint8_t *)info->mem_bar + @@ -480,31 +603,48 @@ static void sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq) { struct sfc_ef10_txq *txq = sfc_ef10_txq_by_dp_txq(dp_txq); - unsigned int txds; + unsigned int completed; - for (txds = 0; txds <= txq->ptr_mask; ++txds) { - if (txq->sw_ring[txds].mbuf != NULL) { - rte_pktmbuf_free(txq->sw_ring[txds].mbuf); - txq->sw_ring[txds].mbuf = NULL; + for (completed = txq->completed; completed != txq->added; ++completed) { + struct sfc_ef10_tx_sw_desc *txd; + + txd = &txq->sw_ring[completed & txq->ptr_mask]; + if (txd->mbuf != NULL) { + rte_pktmbuf_free_seg(txd->mbuf); + txd->mbuf = NULL; } } txq->flags &= ~SFC_EF10_TXQ_STARTED; } +static sfc_dp_tx_qdesc_status_t sfc_ef10_tx_qdesc_status; +static int +sfc_ef10_tx_qdesc_status(__rte_unused struct sfc_dp_txq *dp_txq, + __rte_unused uint16_t offset) +{ + return -ENOTSUP; +} + struct sfc_dp_tx sfc_ef10_tx = { .dp = { .name = SFC_KVARG_DATAPATH_EF10, .type = SFC_DP_TX, .hw_fw_caps = SFC_DP_HW_FW_CAP_EF10, }, - .features = SFC_DP_TX_FEAT_MULTI_SEG, + .features = SFC_DP_TX_FEAT_MULTI_SEG | + SFC_DP_TX_FEAT_MULTI_POOL | + SFC_DP_TX_FEAT_REFCNT | + SFC_DP_TX_FEAT_MULTI_PROCESS, + .get_dev_info = sfc_ef10_get_dev_info, + .qsize_up_rings = sfc_ef10_tx_qsize_up_rings, .qcreate = sfc_ef10_tx_qcreate, .qdestroy = sfc_ef10_tx_qdestroy, .qstart = sfc_ef10_tx_qstart, .qtx_ev = sfc_ef10_tx_qtx_ev, .qstop = sfc_ef10_tx_qstop, .qreap = sfc_ef10_tx_qreap, + .qdesc_status = sfc_ef10_tx_qdesc_status, .pkt_burst = sfc_ef10_xmit_pkts, }; @@ -513,12 +653,15 @@ struct sfc_dp_tx sfc_ef10_simple_tx = { .name = SFC_KVARG_DATAPATH_EF10_SIMPLE, .type = SFC_DP_TX, }, - .features = 0, + .features = SFC_DP_TX_FEAT_MULTI_PROCESS, + .get_dev_info = sfc_ef10_get_dev_info, + .qsize_up_rings = sfc_ef10_tx_qsize_up_rings, .qcreate = sfc_ef10_tx_qcreate, .qdestroy = sfc_ef10_tx_qdestroy, .qstart = sfc_ef10_tx_qstart, .qtx_ev = sfc_ef10_tx_qtx_ev, .qstop = sfc_ef10_tx_qstop, .qreap = sfc_ef10_tx_qreap, + .qdesc_status = sfc_ef10_tx_qdesc_status, .pkt_burst = sfc_ef10_simple_xmit_pkts, };