X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fsfc%2Fsfc_tx.c;h=a2406109d453702545078cf04dfb01ff32f14a8a;hb=c100fd464bb7;hp=9c4d6b730d86530e33bb1fde46d49aa392c7e1e8;hpb=dbf0f6278f6747d93475067b2860a6f691c319fe;p=dpdk.git diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index 9c4d6b730d..a2406109d4 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -28,9 +28,219 @@ */ #include "sfc.h" +#include "sfc_debug.h" #include "sfc_log.h" #include "sfc_ev.h" #include "sfc_tx.h" +#include "sfc_tweak.h" + +/* + * Maximum number of TX queue flush attempts in case of + * failure or flush timeout + */ +#define SFC_TX_QFLUSH_ATTEMPTS (3) + +/* + * Time to wait between event queue polling attempts when waiting for TX + * queue flush done or flush failed events + */ +#define SFC_TX_QFLUSH_POLL_WAIT_MS (1) + +/* + * Maximum number of event queue polling attempts when waiting for TX queue + * flush done or flush failed events; it defines TX queue flush attempt timeout + * together with SFC_TX_QFLUSH_POLL_WAIT_MS + */ +#define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) + +static int +sfc_tx_qcheck_conf(struct sfc_adapter *sa, + const struct rte_eth_txconf *tx_conf) +{ + unsigned int flags = tx_conf->txq_flags; + int rc = 0; + + if (tx_conf->tx_rs_thresh != 0) { + sfc_err(sa, "RS bit in transmit descriptor is not supported"); + rc = EINVAL; + } + + if (tx_conf->tx_free_thresh != 0) { + sfc_err(sa, + "setting explicit TX free threshold is not supported"); + rc = EINVAL; + } + + if (tx_conf->tx_deferred_start != 0) { + sfc_err(sa, "TX queue deferred start is not supported (yet)"); + rc = EINVAL; + } + + if (tx_conf->tx_thresh.pthresh != 0 || + tx_conf->tx_thresh.hthresh != 0 || + tx_conf->tx_thresh.wthresh != 0) { + sfc_err(sa, + "prefetch/host/writeback thresholds are not supported"); + rc = EINVAL; + } + + if ((flags & ETH_TXQ_FLAGS_NOVLANOFFL) == 0) { + sfc_err(sa, "VLAN offload is not supported"); + rc = EINVAL; + } + + if ((flags & ETH_TXQ_FLAGS_NOXSUMSCTP) == 0) { + sfc_err(sa, "SCTP offload is not supported"); + rc = EINVAL; + } + + /* We either perform both TCP and UDP offload, or no offload at all */ + if (((flags & ETH_TXQ_FLAGS_NOXSUMTCP) == 0) != + ((flags & ETH_TXQ_FLAGS_NOXSUMUDP) == 0)) { + sfc_err(sa, "TCP and UDP offloads can't be set independently"); + rc = EINVAL; + } + + return rc; +} + +void +sfc_tx_qflush_done(struct sfc_txq *txq) +{ + txq->state |= SFC_TXQ_FLUSHED; + txq->state &= ~SFC_TXQ_FLUSHING; +} + +static void +sfc_tx_reap(struct sfc_txq *txq) +{ + unsigned int completed; + + + sfc_ev_qpoll(txq->evq); + + for (completed = txq->completed; + completed != txq->pending; completed++) { + struct sfc_tx_sw_desc *txd; + + txd = &txq->sw_ring[completed & txq->ptr_mask]; + + if (txd->mbuf != NULL) { + rte_pktmbuf_free(txd->mbuf); + txd->mbuf = NULL; + } + } + + txq->completed = completed; +} + +int +sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct sfc_txq_info *txq_info; + struct sfc_evq *evq; + struct sfc_txq *txq; + unsigned int evq_index = sfc_evq_index_by_txq_sw_index(sa, sw_index); + int rc = 0; + + sfc_log_init(sa, "TxQ = %u", sw_index); + + rc = sfc_tx_qcheck_conf(sa, tx_conf); + if (rc != 0) + goto fail_bad_conf; + + SFC_ASSERT(sw_index < sa->txq_count); + txq_info = &sa->txq_info[sw_index]; + + SFC_ASSERT(nb_tx_desc <= sa->txq_max_entries); + txq_info->entries = nb_tx_desc; + + rc = sfc_ev_qinit(sa, evq_index, txq_info->entries, socket_id); + if (rc != 0) + goto fail_ev_qinit; + + evq = sa->evq_info[evq_index].evq; + + rc = ENOMEM; + txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id); + if (txq == NULL) + goto fail_txq_alloc; + + rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries), + socket_id, &txq->mem); + if (rc != 0) + goto fail_dma_alloc; + + rc = ENOMEM; + txq->pend_desc = rte_calloc_socket("sfc-txq-pend-desc", + EFX_TXQ_LIMIT(txq_info->entries), + sizeof(efx_desc_t), 0, socket_id); + if (txq->pend_desc == NULL) + goto fail_pend_desc_alloc; + + rc = ENOMEM; + txq->sw_ring = rte_calloc_socket("sfc-txq-desc", txq_info->entries, + sizeof(*txq->sw_ring), 0, socket_id); + if (txq->sw_ring == NULL) + goto fail_desc_alloc; + + txq->state = SFC_TXQ_INITIALIZED; + txq->ptr_mask = txq_info->entries - 1; + txq->hw_index = sw_index; + txq->flags = tx_conf->txq_flags; + txq->evq = evq; + + evq->txq = txq; + + txq_info->txq = txq; + + return 0; + +fail_desc_alloc: + rte_free(txq->pend_desc); + +fail_pend_desc_alloc: + sfc_dma_free(sa, &txq->mem); + +fail_dma_alloc: + rte_free(txq); + +fail_txq_alloc: + sfc_ev_qfini(sa, evq_index); + +fail_ev_qinit: + txq_info->entries = 0; + +fail_bad_conf: + sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); + return rc; +} + +void +sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) +{ + struct sfc_txq_info *txq_info; + struct sfc_txq *txq; + + sfc_log_init(sa, "TxQ = %u", sw_index); + + SFC_ASSERT(sw_index < sa->txq_count); + txq_info = &sa->txq_info[sw_index]; + + txq = txq_info->txq; + SFC_ASSERT(txq != NULL); + SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); + + txq_info->txq = NULL; + txq_info->entries = 0; + + rte_free(txq->sw_ring); + rte_free(txq->pend_desc); + sfc_dma_free(sa, &txq->mem); + rte_free(txq); +} static int sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) @@ -118,7 +328,316 @@ fail_check_mode: void sfc_tx_fini(struct sfc_adapter *sa) { + int sw_index; + + sw_index = sa->txq_count; + while (--sw_index >= 0) { + if (sa->txq_info[sw_index].txq != NULL) + sfc_tx_qfini(sa, sw_index); + } + rte_free(sa->txq_info); sa->txq_info = NULL; sa->txq_count = 0; } + +int +sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) +{ + struct rte_eth_dev_data *dev_data; + struct sfc_txq_info *txq_info; + struct sfc_txq *txq; + struct sfc_evq *evq; + uint16_t flags; + unsigned int desc_index; + int rc = 0; + + sfc_log_init(sa, "TxQ = %u", sw_index); + + SFC_ASSERT(sw_index < sa->txq_count); + txq_info = &sa->txq_info[sw_index]; + + txq = txq_info->txq; + + SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); + + evq = txq->evq; + + rc = sfc_ev_qstart(sa, evq->evq_index); + if (rc != 0) + goto fail_ev_qstart; + + /* + * It seems that DPDK has no controls regarding IPv4 offloads, + * hence, we always enable it here + */ + if ((txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP) || + (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) + flags = EFX_TXQ_CKSUM_IPV4; + else + flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; + + rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, + txq_info->entries, 0 /* not used on EF10 */, + flags, evq->common, + &txq->common, &desc_index); + if (rc != 0) + goto fail_tx_qcreate; + + txq->added = txq->pending = txq->completed = desc_index; + + efx_tx_qenable(txq->common); + + txq->state |= (SFC_TXQ_STARTED | SFC_TXQ_RUNNING); + + /* + * It seems to be used by DPDK for debug purposes only ('rte_ether') + */ + dev_data = sa->eth_dev->data; + dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; + + return 0; + +fail_tx_qcreate: + sfc_ev_qstop(sa, evq->evq_index); + +fail_ev_qstart: + return rc; +} + +void +sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) +{ + struct rte_eth_dev_data *dev_data; + struct sfc_txq_info *txq_info; + struct sfc_txq *txq; + unsigned int retry_count; + unsigned int wait_count; + unsigned int txds; + + sfc_log_init(sa, "TxQ = %u", sw_index); + + SFC_ASSERT(sw_index < sa->txq_count); + txq_info = &sa->txq_info[sw_index]; + + txq = txq_info->txq; + + SFC_ASSERT(txq->state & SFC_TXQ_STARTED); + + txq->state &= ~SFC_TXQ_RUNNING; + + /* + * Retry TX queue flushing in case of flush failed or + * timeout; in the worst case it can delay for 6 seconds + */ + for (retry_count = 0; + ((txq->state & SFC_TXQ_FLUSHED) == 0) && + (retry_count < SFC_TX_QFLUSH_ATTEMPTS); + ++retry_count) { + if (efx_tx_qflush(txq->common) != 0) { + txq->state |= SFC_TXQ_FLUSHING; + break; + } + + /* + * Wait for TX queue flush done or flush failed event at least + * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more + * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied + * by SFC_TX_QFLUSH_POLL_ATTEMPTS) + */ + wait_count = 0; + do { + rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); + sfc_ev_qpoll(txq->evq); + } while ((txq->state & SFC_TXQ_FLUSHING) && + wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); + + if (txq->state & SFC_TXQ_FLUSHING) + sfc_err(sa, "TxQ %u flush timed out", sw_index); + + if (txq->state & SFC_TXQ_FLUSHED) + sfc_info(sa, "TxQ %u flushed", sw_index); + } + + sfc_tx_reap(txq); + + for (txds = 0; txds < txq_info->entries; txds++) { + if (txq->sw_ring[txds].mbuf != NULL) { + rte_pktmbuf_free(txq->sw_ring[txds].mbuf); + txq->sw_ring[txds].mbuf = NULL; + } + } + + txq->state = SFC_TXQ_INITIALIZED; + + efx_tx_qdestroy(txq->common); + + sfc_ev_qstop(sa, txq->evq->evq_index); + + /* + * It seems to be used by DPDK for debug purposes only ('rte_ether') + */ + dev_data = sa->eth_dev->data; + dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; +} + +int +sfc_tx_start(struct sfc_adapter *sa) +{ + unsigned int sw_index; + int rc = 0; + + sfc_log_init(sa, "txq_count = %u", sa->txq_count); + + rc = efx_tx_init(sa->nic); + if (rc != 0) + goto fail_efx_tx_init; + + for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) { + rc = sfc_tx_qstart(sa, sw_index); + if (rc != 0) + goto fail_tx_qstart; + } + + return 0; + +fail_tx_qstart: + while (sw_index-- > 0) + sfc_tx_qstop(sa, sw_index); + + efx_tx_fini(sa->nic); + +fail_efx_tx_init: + sfc_log_init(sa, "failed (rc = %d)", rc); + return rc; +} + +void +sfc_tx_stop(struct sfc_adapter *sa) +{ + unsigned int sw_index; + + sfc_log_init(sa, "txq_count = %u", sa->txq_count); + + sw_index = sa->txq_count; + while (sw_index-- > 0) { + if (sa->txq_info[sw_index].txq != NULL) + sfc_tx_qstop(sa, sw_index); + } + + efx_tx_fini(sa->nic); +} + +uint16_t +sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct sfc_txq *txq = (struct sfc_txq *)tx_queue; + unsigned int added = txq->added; + unsigned int pushed = added; + unsigned int pkts_sent = 0; + efx_desc_t *pend = &txq->pend_desc[0]; + const unsigned int hard_max_fill = EFX_TXQ_LIMIT(txq->ptr_mask + 1); + const unsigned int soft_max_fill = hard_max_fill - + SFC_TX_MAX_PKT_DESC; + unsigned int fill_level = added - txq->completed; + boolean_t reap_done; + int rc __rte_unused; + struct rte_mbuf **pktp; + + if (unlikely((txq->state & SFC_TXQ_RUNNING) == 0)) + goto done; + + /* + * If insufficient space for a single packet is present, + * we should reap; otherwise, we shouldn't do that all the time + * to avoid latency increase + */ + reap_done = (fill_level > soft_max_fill); + + if (reap_done) { + sfc_tx_reap(txq); + /* + * Recalculate fill level since 'txq->completed' + * might have changed on reap + */ + fill_level = added - txq->completed; + } + + for (pkts_sent = 0, pktp = &tx_pkts[0]; + (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); + pkts_sent++, pktp++) { + struct rte_mbuf *m_seg = *pktp; + size_t pkt_len = m_seg->pkt_len; + unsigned int pkt_descs = 0; + + for (; m_seg != NULL; m_seg = m_seg->next) { + efsys_dma_addr_t next_frag; + size_t seg_len; + + seg_len = m_seg->data_len; + next_frag = rte_mbuf_data_dma_addr(m_seg); + + do { + efsys_dma_addr_t frag_addr = next_frag; + size_t frag_len; + + next_frag = RTE_ALIGN(frag_addr + 1, + SFC_TX_SEG_BOUNDARY); + frag_len = MIN(next_frag - frag_addr, seg_len); + seg_len -= frag_len; + pkt_len -= frag_len; + + efx_tx_qdesc_dma_create(txq->common, + frag_addr, frag_len, + (pkt_len == 0), + pend++); + + pkt_descs++; + } while (seg_len != 0); + } + + added += pkt_descs; + + fill_level += pkt_descs; + if (unlikely(fill_level > hard_max_fill)) { + /* + * Our estimation for maximum number of descriptors + * required to send a packet seems to be wrong. + * Try to reap (if we haven't yet). + */ + if (!reap_done) { + sfc_tx_reap(txq); + reap_done = B_TRUE; + fill_level = added - txq->completed; + if (fill_level > hard_max_fill) { + pend -= pkt_descs; + break; + } + } else { + pend -= pkt_descs; + break; + } + } + + /* Assign mbuf to the last used desc */ + txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; + } + + if (likely(pkts_sent > 0)) { + rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, + pend - &txq->pend_desc[0], + txq->completed, &txq->added); + SFC_ASSERT(rc == 0); + + if (likely(pushed != txq->added)) + efx_tx_qpush(txq->common, txq->added, pushed); + } + +#if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE + if (!reap_done) + sfc_tx_reap(txq); +#endif + +done: + return pkts_sent; +}