X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fnet%2Fenetc%2Fenetc_rxtx.c;h=412322523d14284aa949079b79c5d83265b81c40;hb=64767daf3c9eaa4d2b2b00017137ac0566df1c55;hp=8b85c5371951267d29b9c2016d27009f3fa9ce81;hpb=17d13fe659eb6abf4f565a7bfcc4387ba9964bce;p=dpdk.git diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c index 8b85c53719..412322523d 100644 --- a/drivers/net/enetc/enetc_rxtx.c +++ b/drivers/net/enetc/enetc_rxtx.c @@ -14,14 +14,17 @@ #include "enetc.h" #include "enetc_logs.h" +#define ENETC_CACHE_LINE_RXBDS (RTE_CACHE_LINE_SIZE / \ + sizeof(union enetc_rx_bd)) #define ENETC_RXBD_BUNDLE 16 /* Number of buffers to allocate at once */ static int enetc_clean_tx_ring(struct enetc_bdr *tx_ring) { int tx_frm_cnt = 0; - struct enetc_swbd *tx_swbd; - int i, hwci; + struct enetc_swbd *tx_swbd, *tx_swbd_base; + int i, hwci, bd_count; + struct rte_mbuf *m[ENETC_RXBD_BUNDLE]; /* we don't need barriers here, we just want a relatively current value * from HW. @@ -29,8 +32,10 @@ enetc_clean_tx_ring(struct enetc_bdr *tx_ring) hwci = (int)(rte_read32_relaxed(tx_ring->tcisr) & ENETC_TBCISR_IDX_MASK); + tx_swbd_base = tx_ring->q_swbd; + bd_count = tx_ring->bd_count; i = tx_ring->next_to_clean; - tx_swbd = &tx_ring->q_swbd[i]; + tx_swbd = &tx_swbd_base[i]; /* we're only reading the CI index once here, which means HW may update * it while we're doing clean-up. We could read the register in a loop @@ -42,20 +47,33 @@ enetc_clean_tx_ring(struct enetc_bdr *tx_ring) * meantime. */ while (i != hwci) { - rte_pktmbuf_free(tx_swbd->buffer_addr); + /* It seems calling rte_pktmbuf_free is wasting a lot of cycles, + * make a list and call _free when it's done. + */ + if (tx_frm_cnt == ENETC_RXBD_BUNDLE) { + rte_pktmbuf_free_bulk(m, tx_frm_cnt); + tx_frm_cnt = 0; + } + + m[tx_frm_cnt] = tx_swbd->buffer_addr; tx_swbd->buffer_addr = NULL; - tx_swbd++; + i++; - if (unlikely(i == tx_ring->bd_count)) { + tx_swbd++; + if (unlikely(i == bd_count)) { i = 0; - tx_swbd = &tx_ring->q_swbd[0]; + tx_swbd = tx_swbd_base; } tx_frm_cnt++; } + if (tx_frm_cnt) + rte_pktmbuf_free_bulk(m, tx_frm_cnt); + tx_ring->next_to_clean = i; - return tx_frm_cnt++; + + return 0; } uint16_t @@ -234,7 +252,7 @@ static inline void enetc_slow_parsing(struct rte_mbuf *m, } -static inline void __attribute__((hot)) +static inline void __rte_hot enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results) { ENETC_PMD_DP_DEBUG("parse summary = 0x%x ", parse_results); @@ -305,18 +323,37 @@ enetc_clean_rx_ring(struct enetc_bdr *rx_ring, int work_limit) { int rx_frm_cnt = 0; - int cleaned_cnt, i; + int cleaned_cnt, i, bd_count; struct enetc_swbd *rx_swbd; + union enetc_rx_bd *rxbd; - cleaned_cnt = enetc_bd_unused(rx_ring); /* next descriptor to process */ i = rx_ring->next_to_clean; + /* next descriptor to process */ + rxbd = ENETC_RXBD(*rx_ring, i); + rte_prefetch0(rxbd); + bd_count = rx_ring->bd_count; + /* LS1028A does not have platform cache so any software access following + * a hardware write will go directly to DDR. Latency of such a read is + * in excess of 100 core cycles, so try to prefetch more in advance to + * mitigate this. + * How much is worth prefetching really depends on traffic conditions. + * With congested Rx this could go up to 4 cache lines or so. But if + * software keeps up with hardware and follows behind Rx PI by a cache + * line or less then it's harmful in terms of performance to cache more. + * We would only prefetch BDs that have yet to be written by ENETC, + * which will have to be evicted again anyway. + */ + rte_prefetch0(ENETC_RXBD(*rx_ring, + (i + ENETC_CACHE_LINE_RXBDS) % bd_count)); + rte_prefetch0(ENETC_RXBD(*rx_ring, + (i + ENETC_CACHE_LINE_RXBDS * 2) % bd_count)); + + cleaned_cnt = enetc_bd_unused(rx_ring); rx_swbd = &rx_ring->q_swbd[i]; while (likely(rx_frm_cnt < work_limit)) { - union enetc_rx_bd *rxbd; uint32_t bd_status; - rxbd = ENETC_RXBD(*rx_ring, i); bd_status = rte_le_to_cpu_32(rxbd->r.lstatus); if (!bd_status) break; @@ -337,11 +374,18 @@ enetc_clean_rx_ring(struct enetc_bdr *rx_ring, i = 0; rx_swbd = &rx_ring->q_swbd[i]; } + rxbd = ENETC_RXBD(*rx_ring, i); + rte_prefetch0(ENETC_RXBD(*rx_ring, + (i + ENETC_CACHE_LINE_RXBDS) % + bd_count)); + rte_prefetch0(ENETC_RXBD(*rx_ring, + (i + ENETC_CACHE_LINE_RXBDS * 2) % + bd_count)); - rx_ring->next_to_clean = i; rx_frm_cnt++; } + rx_ring->next_to_clean = i; enetc_refill_rx_ring(rx_ring, cleaned_cnt); return rx_frm_cnt;