From 5c7ccb263b4881213f89037118e9fd9658253da3 Mon Sep 17 00:00:00 2001 From: Jerin Jacob Date: Fri, 14 Apr 2017 15:11:07 +0530 Subject: [PATCH] net/thunderx: reduce writes to mbuf With the mbuf rework, we now have 8 contiguous bytes to be rearmed in the mbuf at 8B naturally aligned address. Use single 8B write to avoid multiple 2B writes in Rx path. Signed-off-by: Jerin Jacob Signed-off-by: Maciej Czekaj --- drivers/net/thunderx/nicvf_ethdev.c | 18 ++++++++++++++++++ drivers/net/thunderx/nicvf_rxtx.c | 28 ++++++++++------------------ drivers/net/thunderx/nicvf_rxtx.h | 27 +++++++++++++++++++++++++++ drivers/net/thunderx/nicvf_struct.h | 11 +++++++++++ 4 files changed, 66 insertions(+), 18 deletions(-) diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c index 36ff94f32b..e4910c9b34 100644 --- a/drivers/net/thunderx/nicvf_ethdev.c +++ b/drivers/net/thunderx/nicvf_ethdev.c @@ -1232,6 +1232,23 @@ nicvf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx) return nicvf_vf_stop_tx_queue(dev, nic, qidx); } +static inline void +nicvf_rxq_mbuf_setup(struct nicvf_rxq *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def; + + RTE_BUILD_BUG_ON(sizeof(union mbuf_initializer) != 8); + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* Prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer.value = *(uint64_t *)p; +} static int nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx, @@ -1324,6 +1341,7 @@ nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx, else rxq->rbptr_offset = NICVF_CQE_RBPTR_WORD; + nicvf_rxq_mbuf_setup(rxq); /* Alloc completion queue */ if (nicvf_qset_cq_alloc(dev, nic, rxq, rxq->queue_id, nb_desc)) { diff --git a/drivers/net/thunderx/nicvf_rxtx.c b/drivers/net/thunderx/nicvf_rxtx.c index fc43b747a6..003ab06930 100644 --- a/drivers/net/thunderx/nicvf_rxtx.c +++ b/drivers/net/thunderx/nicvf_rxtx.c @@ -430,9 +430,9 @@ nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) union cq_entry_t *desc = rxq->desc; const uint64_t cqe_mask = rxq->qlen_mask; uint64_t rb0_ptr, mbuf_phys_off = rxq->mbuf_phys_off; + const uint64_t mbuf_init = rxq->mbuf_initializer.value; uint32_t cqe_head = rxq->head & cqe_mask; int32_t available_space = rxq->available_space; - uint8_t port_id = rxq->port_id; const uint8_t rbptr_offset = rxq->rbptr_offset; to_process = nicvf_rx_pkts_to_process(rxq, nb_pkts, available_space); @@ -448,17 +448,12 @@ nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rb0_ptr = *((uint64_t *)cqe_rx + rbptr_offset); pkt = (struct rte_mbuf *)nicvf_mbuff_phy2virt (rb0_ptr - cqe_rx_w1.align_pad, mbuf_phys_off); - pkt->ol_flags = 0; - pkt->port = port_id; pkt->data_len = cqe_rx_w3.rb0_sz; - pkt->data_off = RTE_PKTMBUF_HEADROOM + cqe_rx_w1.align_pad; - pkt->nb_segs = 1; pkt->pkt_len = cqe_rx_w3.rb0_sz; pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0); - + nicvf_mbuff_init_update(pkt, mbuf_init, cqe_rx_w1.align_pad); nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt); - rte_mbuf_refcnt_set(pkt, 1); rx_pkts[i] = pkt; cqe_head = (cqe_head + 1) & cqe_mask; nicvf_prefetch_store_keep(pkt); @@ -481,8 +476,9 @@ nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) static inline uint16_t __hot nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx, - uint64_t mbuf_phys_off, uint8_t port_id, - struct rte_mbuf **rx_pkt, uint8_t rbptr_offset) + uint64_t mbuf_phys_off, + struct rte_mbuf **rx_pkt, uint8_t rbptr_offset, + uint64_t mbuf_init) { struct rte_mbuf *pkt, *seg, *prev; cqe_rx_word0_t cqe_rx_w0; @@ -501,12 +497,10 @@ nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx, (rb_ptr[0] - cqe_rx_w1.align_pad, mbuf_phys_off); pkt->ol_flags = 0; - pkt->port = port_id; - pkt->data_off = RTE_PKTMBUF_HEADROOM + cqe_rx_w1.align_pad; - pkt->nb_segs = nb_segs; pkt->pkt_len = cqe_rx_w1.pkt_len; pkt->data_len = rb_sz[nicvf_frag_num(0)]; - rte_mbuf_refcnt_set(pkt, 1); + nicvf_mbuff_init_mseg_update( + pkt, mbuf_init, cqe_rx_w1.align_pad, nb_segs); pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0); nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt); @@ -518,9 +512,7 @@ nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx, prev->next = seg; seg->data_len = rb_sz[nicvf_frag_num(seg_idx)]; - seg->port = port_id; - seg->data_off = RTE_PKTMBUF_HEADROOM; - rte_mbuf_refcnt_set(seg, 1); + nicvf_mbuff_init_update(seg, mbuf_init, 0); prev = seg; } @@ -541,7 +533,7 @@ nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint32_t i, to_process, cqe_head, buffers_consumed = 0; int32_t available_space = rxq->available_space; uint16_t nb_segs; - const uint8_t port_id = rxq->port_id; + const uint64_t mbuf_init = rxq->mbuf_initializer.value; const uint8_t rbptr_offset = rxq->rbptr_offset; cqe_head = rxq->head & cqe_mask; @@ -552,7 +544,7 @@ nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts, cq_entry = &desc[cqe_head]; cqe_rx = (struct cqe_rx_t *)cq_entry; nb_segs = nicvf_process_cq_mseg_entry(cqe_rx, mbuf_phys_off, - port_id, rx_pkts + i, rbptr_offset); + rx_pkts + i, rbptr_offset, mbuf_init); buffers_consumed += nb_segs; cqe_head = (cqe_head + 1) & cqe_mask; nicvf_prefetch_store_keep(rx_pkts[i]); diff --git a/drivers/net/thunderx/nicvf_rxtx.h b/drivers/net/thunderx/nicvf_rxtx.h index 9dad8a5abf..3631ff22f1 100644 --- a/drivers/net/thunderx/nicvf_rxtx.h +++ b/drivers/net/thunderx/nicvf_rxtx.h @@ -84,6 +84,33 @@ fill_sq_desc_gather(union sq_entry_t *entry, struct rte_mbuf *pkt) } #endif +static inline void +nicvf_mbuff_init_update(struct rte_mbuf *pkt, const uint64_t mbuf_init, + uint16_t apad) +{ + union mbuf_initializer init = {.value = mbuf_init}; +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + init.fields.data_off += apad; +#else + init.value += apad; +#endif + *(uint64_t *)(&pkt->rearm_data) = init.value; +} + +static inline void +nicvf_mbuff_init_mseg_update(struct rte_mbuf *pkt, const uint64_t mbuf_init, + uint16_t apad, uint16_t nb_segs) +{ + union mbuf_initializer init = {.value = mbuf_init}; +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + init.fields.data_off += apad; +#else + init.value += apad; +#endif + init.fields.nb_segs = nb_segs; + *(uint64_t *)(&pkt->rearm_data) = init.value; +} + uint32_t nicvf_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx); uint32_t nicvf_dev_rbdr_refill(struct rte_eth_dev *dev, uint16_t queue_idx); diff --git a/drivers/net/thunderx/nicvf_struct.h b/drivers/net/thunderx/nicvf_struct.h index 5bc6d57739..34c41b7912 100644 --- a/drivers/net/thunderx/nicvf_struct.h +++ b/drivers/net/thunderx/nicvf_struct.h @@ -72,10 +72,21 @@ struct nicvf_txq { uint16_t tx_free_thresh; } __rte_cache_aligned; +union mbuf_initializer { + struct { + uint16_t data_off; + uint16_t refcnt; + uint16_t nb_segs; + uint16_t port; + } fields; + uint64_t value; +}; + struct nicvf_rxq { uint64_t mbuf_phys_off; uintptr_t cq_status; uintptr_t cq_door; + union mbuf_initializer mbuf_initializer; nicvf_phys_addr_t phys; union cq_entry_t *desc; struct nicvf_rbdr *shared_rbdr; -- 2.20.1