X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fsfc%2Fsfc_ef10_tx.c;h=9047b3e46e5c07c758ba5cdcef49cc64b13e0192;hb=2fd826a301b5a872cad0b67c807d3011693321c7;hp=751452961574e5d431be4371c23bdb14d3548fd0;hpb=8b00f426eb6614d5c01fec95cdc271700f85f886;p=dpdk.git diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c index 7514529615..9047b3e46e 100644 --- a/drivers/net/sfc/sfc_ef10_tx.c +++ b/drivers/net/sfc/sfc_ef10_tx.c @@ -47,19 +47,10 @@ #define sfc_ef10_tx_err(dpq, ...) \ SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__) -/** Maximum length of the mbuf segment data */ -#define SFC_MBUF_SEG_LEN_MAX \ - ((1u << (8 * sizeof(((struct rte_mbuf *)0)->data_len))) - 1) - /** Maximum length of the DMA descriptor data */ #define SFC_EF10_TX_DMA_DESC_LEN_MAX \ ((1u << ESF_DZ_TX_KER_BYTE_CNT_WIDTH) - 1) -/** Maximum number of DMA descriptors per mbuf segment */ -#define SFC_EF10_TX_MBUF_SEG_DESCS_MAX \ - SFC_DIV_ROUND_UP(SFC_MBUF_SEG_LEN_MAX, \ - SFC_EF10_TX_DMA_DESC_LEN_MAX) - /** * Maximum number of descriptors/buffers in the Tx ring. * It should guarantee that corresponding event queue never overfill. @@ -137,14 +128,10 @@ sfc_ef10_tx_get_event(struct sfc_ef10_txq *txq, efx_qword_t *tx_ev) return true; } -static void -sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) +static unsigned int +sfc_ef10_tx_process_events(struct sfc_ef10_txq *txq) { - const unsigned int old_read_ptr = txq->evq_read_ptr; - const unsigned int ptr_mask = txq->ptr_mask; - unsigned int completed = txq->completed; - unsigned int pending = completed; - const unsigned int curr_done = pending - 1; + const unsigned int curr_done = txq->completed - 1; unsigned int anew_done = curr_done; efx_qword_t tx_ev; @@ -157,20 +144,49 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) /* Update the latest done descriptor */ anew_done = EFX_QWORD_FIELD(tx_ev, ESF_DZ_TX_DESCR_INDX); } - pending += (anew_done - curr_done) & ptr_mask; + return (anew_done - curr_done) & txq->ptr_mask; +} + +static void +sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) +{ + const unsigned int old_read_ptr = txq->evq_read_ptr; + const unsigned int ptr_mask = txq->ptr_mask; + unsigned int completed = txq->completed; + unsigned int pending = completed; + + pending += sfc_ef10_tx_process_events(txq); if (pending != completed) { + struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE]; + unsigned int nb = 0; + do { struct sfc_ef10_tx_sw_desc *txd; + struct rte_mbuf *m; txd = &txq->sw_ring[completed & ptr_mask]; - - if (txd->mbuf != NULL) { - rte_pktmbuf_free(txd->mbuf); - txd->mbuf = NULL; + if (txd->mbuf == NULL) + continue; + + m = rte_pktmbuf_prefree_seg(txd->mbuf); + txd->mbuf = NULL; + if (m == NULL) + continue; + + if ((nb == RTE_DIM(bulk)) || + ((nb != 0) && (m->pool != bulk[0]->pool))) { + rte_mempool_put_bulk(bulk[0]->pool, + (void *)bulk, nb); + nb = 0; } + + bulk[nb++] = m; } while (++completed != pending); + if (nb != 0) + rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb); + txq->completed = completed; } @@ -221,6 +237,53 @@ sfc_ef10_tx_qpush(struct sfc_ef10_txq *txq, unsigned int added, *(volatile __m128i *)txq->doorbell = oword.eo_u128[0]; } +static unsigned int +sfc_ef10_tx_pkt_descs_max(const struct rte_mbuf *m) +{ + unsigned int extra_descs_per_seg; + unsigned int extra_descs_per_pkt; + + /* + * VLAN offload is not supported yet, so no extra descriptors + * are required for VLAN option descriptor. + */ + +/** Maximum length of the mbuf segment data */ +#define SFC_MBUF_SEG_LEN_MAX UINT16_MAX + RTE_BUILD_BUG_ON(sizeof(m->data_len) != 2); + + /* + * Each segment is already counted once below. So, calculate + * how many extra DMA descriptors may be required per segment in + * the worst case because of maximum DMA descriptor length limit. + * If maximum segment length is less or equal to maximum DMA + * descriptor length, no extra DMA descriptors are required. + */ + extra_descs_per_seg = + (SFC_MBUF_SEG_LEN_MAX - 1) / SFC_EF10_TX_DMA_DESC_LEN_MAX; + +/** Maximum length of the packet */ +#define SFC_MBUF_PKT_LEN_MAX UINT32_MAX + RTE_BUILD_BUG_ON(sizeof(m->pkt_len) != 4); + + /* + * One more limitation on maximum number of extra DMA descriptors + * comes from slicing entire packet because of DMA descriptor length + * limit taking into account that there is at least one segment + * which is already counted below (so division of the maximum + * packet length minus one with round down). + * TSO is not supported yet, so packet length is limited by + * maximum PDU size. + */ + extra_descs_per_pkt = + (RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, + SFC_MBUF_PKT_LEN_MAX) - 1) / + SFC_EF10_TX_DMA_DESC_LEN_MAX; + + return m->nb_segs + RTE_MIN(m->nb_segs * extra_descs_per_seg, + extra_descs_per_pkt); +} + static uint16_t sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { @@ -258,8 +321,7 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) if (likely(pktp + 1 != pktp_end)) rte_mbuf_prefetch_part1(pktp[1]); - if (m_seg->nb_segs * SFC_EF10_TX_MBUF_SEG_DESCS_MAX > - dma_desc_space) { + if (sfc_ef10_tx_pkt_descs_max(m_seg) > dma_desc_space) { if (reap_done) break; @@ -273,8 +335,7 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) reap_done = true; dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - (added - txq->completed); - if (m_seg->nb_segs * SFC_EF10_TX_MBUF_SEG_DESCS_MAX > - dma_desc_space) + if (sfc_ef10_tx_pkt_descs_max(m_seg) > dma_desc_space) break; } @@ -282,6 +343,7 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) do { phys_addr_t seg_addr = rte_mbuf_data_dma_addr(m_seg); unsigned int seg_len = rte_pktmbuf_data_len(m_seg); + unsigned int id = added & ptr_mask; SFC_ASSERT(seg_len <= SFC_EF10_TX_DMA_DESC_LEN_MAX); @@ -289,15 +351,30 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) sfc_ef10_tx_qdesc_dma_create(seg_addr, seg_len, (pkt_len == 0), - &txq->txq_hw_ring[added & ptr_mask]); + &txq->txq_hw_ring[id]); + + /* + * rte_pktmbuf_free() is commonly used in DPDK for + * recycling packets - the function checks every + * segment's reference counter and returns the + * buffer to its pool whenever possible; + * nevertheless, freeing mbuf segments one by one + * may entail some performance decline; + * from this point, sfc_efx_tx_reap() does the same job + * on its own and frees buffers in bulks (all mbufs + * within a bulk belong to the same pool); + * from this perspective, individual segment pointers + * must be associated with the corresponding SW + * descriptors independently so that only one loop + * is sufficient on reap to inspect all the buffers + */ + txq->sw_ring[id].mbuf = m_seg; + ++added; } while ((m_seg = m_seg->next) != 0); dma_desc_space -= (added - pkt_start); - - /* Assign mbuf to the last used desc */ - txq->sw_ring[(added - 1) & ptr_mask].mbuf = *pktp; } if (likely(added != txq->added)) { @@ -313,6 +390,102 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return pktp - &tx_pkts[0]; } +static void +sfc_ef10_simple_tx_reap(struct sfc_ef10_txq *txq) +{ + const unsigned int old_read_ptr = txq->evq_read_ptr; + const unsigned int ptr_mask = txq->ptr_mask; + unsigned int completed = txq->completed; + unsigned int pending = completed; + + pending += sfc_ef10_tx_process_events(txq); + + if (pending != completed) { + struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE]; + unsigned int nb = 0; + + do { + struct sfc_ef10_tx_sw_desc *txd; + + txd = &txq->sw_ring[completed & ptr_mask]; + + if (nb == RTE_DIM(bulk)) { + rte_mempool_put_bulk(bulk[0]->pool, + (void *)bulk, nb); + nb = 0; + } + + bulk[nb++] = txd->mbuf; + } while (++completed != pending); + + rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb); + + txq->completed = completed; + } + + sfc_ef10_ev_qclear(txq->evq_hw_ring, ptr_mask, old_read_ptr, + txq->evq_read_ptr); +} + + +static uint16_t +sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct sfc_ef10_txq * const txq = sfc_ef10_txq_by_dp_txq(tx_queue); + unsigned int ptr_mask; + unsigned int added; + unsigned int dma_desc_space; + bool reap_done; + struct rte_mbuf **pktp; + struct rte_mbuf **pktp_end; + + if (unlikely(txq->flags & + (SFC_EF10_TXQ_NOT_RUNNING | SFC_EF10_TXQ_EXCEPTION))) + return 0; + + ptr_mask = txq->ptr_mask; + added = txq->added; + dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - + (added - txq->completed); + + reap_done = (dma_desc_space < RTE_MAX(txq->free_thresh, nb_pkts)); + if (reap_done) { + sfc_ef10_simple_tx_reap(txq); + dma_desc_space = SFC_EF10_TXQ_LIMIT(ptr_mask + 1) - + (added - txq->completed); + } + + pktp_end = &tx_pkts[MIN(nb_pkts, dma_desc_space)]; + for (pktp = &tx_pkts[0]; pktp != pktp_end; ++pktp) { + struct rte_mbuf *pkt = *pktp; + unsigned int id = added & ptr_mask; + + SFC_ASSERT(rte_pktmbuf_data_len(pkt) <= + SFC_EF10_TX_DMA_DESC_LEN_MAX); + + sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_dma_addr(pkt), + rte_pktmbuf_data_len(pkt), + true, &txq->txq_hw_ring[id]); + + txq->sw_ring[id].mbuf = pkt; + + ++added; + } + + if (likely(added != txq->added)) { + sfc_ef10_tx_qpush(txq, added, txq->added); + txq->added = added; + } + +#if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE + if (!reap_done) + sfc_ef10_simple_tx_reap(txq); +#endif + + return pktp - &tx_pkts[0]; +} + static sfc_dp_tx_qcreate_t sfc_ef10_tx_qcreate; static int @@ -422,30 +595,61 @@ static void sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq) { struct sfc_ef10_txq *txq = sfc_ef10_txq_by_dp_txq(dp_txq); - unsigned int txds; + unsigned int completed; + + for (completed = txq->completed; completed != txq->added; ++completed) { + struct sfc_ef10_tx_sw_desc *txd; - for (txds = 0; txds <= txq->ptr_mask; ++txds) { - if (txq->sw_ring[txds].mbuf != NULL) { - rte_pktmbuf_free(txq->sw_ring[txds].mbuf); - txq->sw_ring[txds].mbuf = NULL; + txd = &txq->sw_ring[completed & txq->ptr_mask]; + if (txd->mbuf != NULL) { + rte_pktmbuf_free(txd->mbuf); + txd->mbuf = NULL; } } txq->flags &= ~SFC_EF10_TXQ_STARTED; } +static sfc_dp_tx_qdesc_status_t sfc_ef10_tx_qdesc_status; +static int +sfc_ef10_tx_qdesc_status(__rte_unused struct sfc_dp_txq *dp_txq, + __rte_unused uint16_t offset) +{ + return -ENOTSUP; +} + struct sfc_dp_tx sfc_ef10_tx = { .dp = { .name = SFC_KVARG_DATAPATH_EF10, .type = SFC_DP_TX, .hw_fw_caps = SFC_DP_HW_FW_CAP_EF10, }, - .features = 0, + .features = SFC_DP_TX_FEAT_MULTI_SEG | + SFC_DP_TX_FEAT_MULTI_POOL | + SFC_DP_TX_FEAT_REFCNT | + SFC_DP_TX_FEAT_MULTI_PROCESS, .qcreate = sfc_ef10_tx_qcreate, .qdestroy = sfc_ef10_tx_qdestroy, .qstart = sfc_ef10_tx_qstart, .qtx_ev = sfc_ef10_tx_qtx_ev, .qstop = sfc_ef10_tx_qstop, .qreap = sfc_ef10_tx_qreap, + .qdesc_status = sfc_ef10_tx_qdesc_status, .pkt_burst = sfc_ef10_xmit_pkts, }; + +struct sfc_dp_tx sfc_ef10_simple_tx = { + .dp = { + .name = SFC_KVARG_DATAPATH_EF10_SIMPLE, + .type = SFC_DP_TX, + }, + .features = SFC_DP_TX_FEAT_MULTI_PROCESS, + .qcreate = sfc_ef10_tx_qcreate, + .qdestroy = sfc_ef10_tx_qdestroy, + .qstart = sfc_ef10_tx_qstart, + .qtx_ev = sfc_ef10_tx_qtx_ev, + .qstop = sfc_ef10_tx_qstop, + .qreap = sfc_ef10_tx_qreap, + .qdesc_status = sfc_ef10_tx_qdesc_status, + .pkt_burst = sfc_ef10_simple_xmit_pkts, +};