The freeing of mbuf's in ixgbe is one of the observable hot spots
under load. Optimize it by doing bulk free of mbufs using code similar
to i40e and fm10k.
Drop the no longer needed micro-optimization for the no refcount flag.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
{
struct ixgbe_tx_entry *txep;
uint32_t status;
{
struct ixgbe_tx_entry *txep;
uint32_t status;
+ int i, nb_free = 0;
+ struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
/* check DD bit on threshold descriptor */
status = txq->tx_ring[txq->tx_next_dd].wb.status;
/* check DD bit on threshold descriptor */
status = txq->tx_ring[txq->tx_next_dd].wb.status;
*/
txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
*/
txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
- /* free buffers one at a time */
- if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
- for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
- txep->mbuf->next = NULL;
- rte_mempool_put(txep->mbuf->pool, txep->mbuf);
- txep->mbuf = NULL;
- }
- } else {
- for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
- rte_pktmbuf_free_seg(txep->mbuf);
- txep->mbuf = NULL;
+ for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
+ /* free buffers one at a time */
+ m = __rte_pktmbuf_prefree_seg(txep->mbuf);
+ txep->mbuf = NULL;
+
+ if (unlikely(m == NULL))
+ continue;
+
+ if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
+ (nb_free > 0 && m->pool != free[0]->pool)) {
+ rte_mempool_put_bulk(free[0]->pool,
+ (void **)free, nb_free);
+ nb_free = 0;
+ if (nb_free > 0)
+ rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+
/* buffers were freed, update counters */
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
/* buffers were freed, update counters */
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
#define RTE_PMD_IXGBE_TX_MAX_BURST 32
#define RTE_PMD_IXGBE_RX_MAX_BURST 32
#define RTE_PMD_IXGBE_TX_MAX_BURST 32
#define RTE_PMD_IXGBE_RX_MAX_BURST 32
+#define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
#define RTE_IXGBE_DESCS_PER_LOOP 4
#ifdef RTE_IXGBE_INC_VECTOR
#define RTE_IXGBE_RXQ_REARM_THRESH 32
#define RTE_IXGBE_MAX_RX_BURST RTE_IXGBE_RXQ_REARM_THRESH
#define RTE_IXGBE_DESCS_PER_LOOP 4
#ifdef RTE_IXGBE_INC_VECTOR
#define RTE_IXGBE_RXQ_REARM_THRESH 32
#define RTE_IXGBE_MAX_RX_BURST RTE_IXGBE_RXQ_REARM_THRESH
-#define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
#endif
#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_IXGBE_DESCS_PER_LOOP - 1) * \
#endif
#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_IXGBE_DESCS_PER_LOOP - 1) * \