+ /* next descriptor to process */
+ rxbd = ENETC_RXBD(*rx_ring, i);
+ rte_prefetch0(rxbd);
+ bd_count = rx_ring->bd_count;
+ /* LS1028A does not have platform cache so any software access following
+ * a hardware write will go directly to DDR. Latency of such a read is
+ * in excess of 100 core cycles, so try to prefetch more in advance to
+ * mitigate this.
+ * How much is worth prefetching really depends on traffic conditions.
+ * With congested Rx this could go up to 4 cache lines or so. But if
+ * software keeps up with hardware and follows behind Rx PI by a cache
+ * line or less then it's harmful in terms of performance to cache more.
+ * We would only prefetch BDs that have yet to be written by ENETC,
+ * which will have to be evicted again anyway.
+ */
+ rte_prefetch0(ENETC_RXBD(*rx_ring,
+ (i + ENETC_CACHE_LINE_RXBDS) % bd_count));
+ rte_prefetch0(ENETC_RXBD(*rx_ring,
+ (i + ENETC_CACHE_LINE_RXBDS * 2) % bd_count));
+
+ cleaned_cnt = enetc_bd_unused(rx_ring);