net/sfc: send bursts of packets
authorIvan Malov <ivan.malov@oktetlabs.ru>
Tue, 29 Nov 2016 16:19:27 +0000 (16:19 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 17 Jan 2017 18:39:27 +0000 (19:39 +0100)
Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <arybchenko@solarflare.com>
Reviewed-by: Andy Moreton <amoreton@solarflare.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
drivers/net/sfc/sfc_ethdev.c
drivers/net/sfc/sfc_ev.c
drivers/net/sfc/sfc_tweak.h
drivers/net/sfc/sfc_tx.c
drivers/net/sfc/sfc_tx.h

index 4a4c4dd..68717a6 100644 (file)
@@ -387,6 +387,7 @@ sfc_eth_dev_init(struct rte_eth_dev *dev)
 
        dev->dev_ops = &sfc_eth_dev_ops;
        dev->rx_pkt_burst = &sfc_recv_pkts;
+       dev->tx_pkt_burst = &sfc_xmit_pkts;
 
        sfc_adapter_unlock(sa);
 
@@ -425,6 +426,7 @@ sfc_eth_dev_uninit(struct rte_eth_dev *dev)
 
        dev->dev_ops = NULL;
        dev->rx_pkt_burst = NULL;
+       dev->tx_pkt_burst = NULL;
 
        sfc_kvargs_cleanup(sa);
 
index 075172a..96b95cc 100644 (file)
@@ -140,12 +140,30 @@ done:
 }
 
 static boolean_t
-sfc_ev_tx(void *arg, __rte_unused uint32_t label, __rte_unused uint32_t id)
+sfc_ev_tx(void *arg, __rte_unused uint32_t label, uint32_t id)
 {
        struct sfc_evq *evq = arg;
+       struct sfc_txq *txq;
+       unsigned int stop;
+       unsigned int delta;
 
-       sfc_err(evq->sa, "EVQ %u unexpected Tx event", evq->evq_index);
-       return B_TRUE;
+       txq = evq->txq;
+
+       SFC_ASSERT(txq != NULL);
+       SFC_ASSERT(txq->evq == evq);
+
+       if (unlikely((txq->state & SFC_TXQ_STARTED) == 0))
+               goto done;
+
+       stop = (id + 1) & txq->ptr_mask;
+       id = txq->pending & txq->ptr_mask;
+
+       delta = (stop >= id) ? (stop - id) : (txq->ptr_mask + 1 - id + stop);
+
+       txq->pending += delta;
+
+done:
+       return B_FALSE;
 }
 
 static boolean_t
index 8538d56..8a60f35 100644 (file)
  */
 #define SFC_RX_REFILL_BULK     (RTE_CACHE_LINE_SIZE / sizeof(efx_qword_t))
 
+/**
+ * Make the transmit path reap at least one time per a burst;
+ * this improves cache locality because the same mbufs may be used to send
+ * subsequent bursts in certain cases because of well-timed reap
+ */
+#define SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE    0
+
 #endif /* _SFC_TWEAK_H_ */
index 3f38066..a240610 100644 (file)
@@ -32,6 +32,7 @@
 #include "sfc_log.h"
 #include "sfc_ev.h"
 #include "sfc_tx.h"
+#include "sfc_tweak.h"
 
 /*
  * Maximum number of TX queue flush attempts in case of
@@ -526,3 +527,117 @@ sfc_tx_stop(struct sfc_adapter *sa)
 
        efx_tx_fini(sa->nic);
 }
+
+uint16_t
+sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       struct sfc_txq *txq = (struct sfc_txq *)tx_queue;
+       unsigned int added = txq->added;
+       unsigned int pushed = added;
+       unsigned int pkts_sent = 0;
+       efx_desc_t *pend = &txq->pend_desc[0];
+       const unsigned int hard_max_fill = EFX_TXQ_LIMIT(txq->ptr_mask + 1);
+       const unsigned int soft_max_fill = hard_max_fill -
+                                          SFC_TX_MAX_PKT_DESC;
+       unsigned int fill_level = added - txq->completed;
+       boolean_t reap_done;
+       int rc __rte_unused;
+       struct rte_mbuf **pktp;
+
+       if (unlikely((txq->state & SFC_TXQ_RUNNING) == 0))
+               goto done;
+
+       /*
+        * If insufficient space for a single packet is present,
+        * we should reap; otherwise, we shouldn't do that all the time
+        * to avoid latency increase
+        */
+       reap_done = (fill_level > soft_max_fill);
+
+       if (reap_done) {
+               sfc_tx_reap(txq);
+               /*
+                * Recalculate fill level since 'txq->completed'
+                * might have changed on reap
+                */
+               fill_level = added - txq->completed;
+       }
+
+       for (pkts_sent = 0, pktp = &tx_pkts[0];
+            (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill);
+            pkts_sent++, pktp++) {
+               struct rte_mbuf         *m_seg = *pktp;
+               size_t                  pkt_len = m_seg->pkt_len;
+               unsigned int            pkt_descs = 0;
+
+               for (; m_seg != NULL; m_seg = m_seg->next) {
+                       efsys_dma_addr_t        next_frag;
+                       size_t                  seg_len;
+
+                       seg_len = m_seg->data_len;
+                       next_frag = rte_mbuf_data_dma_addr(m_seg);
+
+                       do {
+                               efsys_dma_addr_t        frag_addr = next_frag;
+                               size_t                  frag_len;
+
+                               next_frag = RTE_ALIGN(frag_addr + 1,
+                                                     SFC_TX_SEG_BOUNDARY);
+                               frag_len = MIN(next_frag - frag_addr, seg_len);
+                               seg_len -= frag_len;
+                               pkt_len -= frag_len;
+
+                               efx_tx_qdesc_dma_create(txq->common,
+                                                       frag_addr, frag_len,
+                                                       (pkt_len == 0),
+                                                       pend++);
+
+                               pkt_descs++;
+                       } while (seg_len != 0);
+               }
+
+               added += pkt_descs;
+
+               fill_level += pkt_descs;
+               if (unlikely(fill_level > hard_max_fill)) {
+                       /*
+                        * Our estimation for maximum number of descriptors
+                        * required to send a packet seems to be wrong.
+                        * Try to reap (if we haven't yet).
+                        */
+                       if (!reap_done) {
+                               sfc_tx_reap(txq);
+                               reap_done = B_TRUE;
+                               fill_level = added - txq->completed;
+                               if (fill_level > hard_max_fill) {
+                                       pend -= pkt_descs;
+                                       break;
+                               }
+                       } else {
+                               pend -= pkt_descs;
+                               break;
+                       }
+               }
+
+               /* Assign mbuf to the last used desc */
+               txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp;
+       }
+
+       if (likely(pkts_sent > 0)) {
+               rc = efx_tx_qdesc_post(txq->common, txq->pend_desc,
+                                      pend - &txq->pend_desc[0],
+                                      txq->completed, &txq->added);
+               SFC_ASSERT(rc == 0);
+
+               if (likely(pushed != txq->added))
+                       efx_tx_qpush(txq->common, txq->added, pushed);
+       }
+
+#if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE
+       if (!reap_done)
+               sfc_tx_reap(txq);
+#endif
+
+done:
+       return pkts_sent;
+}
index d74de00..fe2736b 100644 (file)
 extern "C" {
 #endif
 
+/**
+ * Estimated maximum number of segments that transmit packet consists of;
+ * it is determined with respect to the expectation of a packet to consist
+ * of a header plus a couple of data segments one of those crossing 4K page;
+ * it is used by transmit path to avoid redundant reaping and, thus,
+ * to avoid increase of latency
+ */
+#define SFC_TX_MAX_PKT_DESC    4
+
+/**
+ * A segment must not cross 4K boundary
+ * (this is a requirement of NIC TX descriptors)
+ */
+#define SFC_TX_SEG_BOUNDARY    4096
+
 struct sfc_adapter;
 struct sfc_evq;
 
@@ -100,6 +115,9 @@ void sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index);
 int sfc_tx_start(struct sfc_adapter *sa);
 void sfc_tx_stop(struct sfc_adapter *sa);
 
+uint16_t sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+                      uint16_t nb_pkts);
+
 #ifdef __cplusplus
 }
 #endif