rte_pktmbuf_free_bulk calls rte_mempool_put_bulk with the number of
pending packets to return to the mempool. In contrast, rte_pktmbuf_free
calls rte_mempool_put that calls rte_mempool_put_bulk with one object.
An important performance related downside of adding one packet at a time
to the mempool is that on each call, the per-core cache pointer needs to
be read from tls while a single rte_mempool_put_bulk only reads from the
tls once.
Signed-off-by: Balazs Nemeth <bnemeth@redhat.com>
Reviewed-by: Igor Russkikh <irusskikh@marvell.com>
struct rte_mbuf *mbuf;
uint16_t nb_segs;
uint16_t idx;
+ uint16_t first_idx;
rte_compiler_barrier();
sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
remaining = hw_bd_cons - sw_tx_cons;
txq->nb_tx_avail += remaining;
+ first_idx = idx;
while (remaining) {
mbuf = txq->sw_tx_ring[idx];
nb_segs--;
}
- rte_pktmbuf_free(mbuf);
idx = (idx + 1) & mask;
PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n");
}
txq->sw_tx_cons = idx;
+
+ if (first_idx > idx) {
+ rte_pktmbuf_free_bulk(&txq->sw_tx_ring[first_idx],
+ mask - first_idx + 1);
+ rte_pktmbuf_free_bulk(&txq->sw_tx_ring[0], idx);
+ } else {
+ rte_pktmbuf_free_bulk(&txq->sw_tx_ring[first_idx],
+ idx - first_idx);
+ }
}
static int qede_drain_txq(struct qede_dev *qdev,