From d06e6bb248d4ddaf01e4d444aac957a102eb525e Mon Sep 17 00:00:00 2001 From: "Chen Jing D(Mark)" Date: Thu, 28 Jan 2016 17:45:59 +0800 Subject: [PATCH] fm10k: optimize mbuf freeing in non-vector Tx When the TX function tries to free a bunch of mbufs, it will free them one by one. This change will scan the free list and merge the requests in case they belongs to same pool, then free once, which will reduce cycles on freeing mbufs. Signed-off-by: Chen Jing D(Mark) Acked-by: Shaopeng He --- doc/guides/rel_notes/release_16_04.rst | 4 ++ drivers/net/fm10k/fm10k_rxtx.c | 59 ++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst index 67892a8d45..e188033f82 100644 --- a/doc/guides/rel_notes/release_16_04.rst +++ b/doc/guides/rel_notes/release_16_04.rst @@ -158,6 +158,10 @@ This section should contain new features added in this release. Sample format: * **Added fm10k Rx interrupt support.** +* **Optimized fm10k Tx.** + + * Free multiple mbufs at a time to reduce freeing mbuf cycles. + * **Increased number of next hops for LPM IPv4 to 2^24.** The next_hop field is extended from 8 bits to 24 bits for IPv4. diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 4eca3d6f10..9f832c14a1 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -394,6 +394,51 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset) return ret; } +/* + * Free multiple TX mbuf at a time if they are in the same pool + * + * @txep: software desc ring index that starts to free + * @num: number of descs to free + * + */ +static inline void tx_free_bulk_mbuf(struct rte_mbuf **txep, int num) +{ + struct rte_mbuf *m, *free[RTE_FM10K_TX_MAX_FREE_BUF_SZ]; + int i; + int nb_free = 0; + + if (unlikely(num == 0)) + return; + + m = __rte_pktmbuf_prefree_seg(txep[0]); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < num; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i]); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) + free[nb_free++] = m; + else { + rte_mempool_put_bulk(free[0]->pool, + (void *)free, nb_free); + free[0] = m; + nb_free = 1; + } + } + txep[i] = NULL; + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + } else { + for (i = 1; i < num; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i]); + if (m != NULL) + rte_mempool_put(m->pool, m); + txep[i] = NULL; + } + } +} + static inline void tx_free_descriptors(struct fm10k_tx_queue *q) { uint16_t next_rs, count = 0; @@ -410,11 +455,7 @@ static inline void tx_free_descriptors(struct fm10k_tx_queue *q) * including nb_desc */ if (q->last_free > next_rs) { count = q->nb_desc - q->last_free; - while (q->last_free < q->nb_desc) { - rte_pktmbuf_free_seg(q->sw_ring[q->last_free]); - q->sw_ring[q->last_free] = NULL; - ++q->last_free; - } + tx_free_bulk_mbuf(&q->sw_ring[q->last_free], count); q->last_free = 0; } @@ -422,10 +463,10 @@ static inline void tx_free_descriptors(struct fm10k_tx_queue *q) q->nb_free += count + (next_rs + 1 - q->last_free); /* free buffers from last_free, up to and including next_rs */ - while (q->last_free <= next_rs) { - rte_pktmbuf_free_seg(q->sw_ring[q->last_free]); - q->sw_ring[q->last_free] = NULL; - ++q->last_free; + if (q->last_free <= next_rs) { + count = next_rs - q->last_free + 1; + tx_free_bulk_mbuf(&q->sw_ring[q->last_free], count); + q->last_free += count; } if (q->last_free == q->nb_desc) -- 2.20.1