From: Pavan Nikhilesh Date: Thu, 8 Oct 2020 18:48:44 +0000 (+0530) Subject: event/octeontx2: improve single flow performance X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=cb7ee83b63657b4c519c35570f85c748462a100a;p=dpdk.git event/octeontx2: improve single flow performance Improve single flow performance by moving the point of coherence to the end of transmit sequence. Signed-off-by: Pavan Nikhilesh --- diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 771f75efd7..c38de99fe1 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -255,15 +255,6 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) #endif } -static __rte_always_inline void -otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag) -{ - if (wait_flag) - otx2_ssogws_head_wait(ws); - - rte_io_wmb(); -} - static __rte_always_inline const struct otx2_eth_txq * otx2_ssogws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) @@ -295,10 +286,9 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], return otx2_sec_event_tx(ws, ev, m, txq, flags); } - rte_prefetch_non_temporal(&txq_data[m->port][0]); /* Perform header writes before barrier for TSO */ otx2_nix_xmit_prepare_tso(m, flags); - otx2_ssogws_order(ws, !ev->sched_type); + rte_io_wmb(); txq = otx2_ssogws_xtract_meta(m, txq_data); otx2_ssogws_prepare_pkt(txq, m, cmd, flags); @@ -306,12 +296,31 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags); otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], m->ol_flags, segdw, flags); - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); + if (!ev->sched_type) { + otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + otx2_ssogws_head_wait(ws); + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } } else { /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], m->ol_flags, 4, flags); - otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); + + if (!ev->sched_type) { + otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + otx2_ssogws_head_wait(ws); + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + otx2_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } } otx2_write64(0, ws->swtag_flush_op); diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h index 3c43170920..caf170fd1a 100644 --- a/drivers/net/octeontx2/otx2_tx.h +++ b/drivers/net/octeontx2/otx2_tx.h @@ -383,6 +383,18 @@ otx2_nix_xmit_one(uint64_t *cmd, void *lmt_addr, } while (lmt_status == 0); } +static __rte_always_inline void +otx2_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags) +{ + otx2_lmt_mov(lmt_addr, cmd, otx2_nix_tx_ext_subs(flags)); +} + +static __rte_always_inline uint64_t +otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) +{ + return otx2_lmt_submit(io_addr); +} + static __rte_always_inline uint16_t otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) { @@ -453,6 +465,12 @@ otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) return segdw; } +static __rte_always_inline void +otx2_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw) +{ + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); +} + static __rte_always_inline void otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr, uint16_t segdw)