+static void *
+hns3_tx_push_get_queue_tail_reg(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+#define HNS3_TX_PUSH_TQP_REGION_SIZE 0x10000
+#define HNS3_TX_PUSH_QUICK_DOORBELL_OFFSET 64
+#define HNS3_TX_PUSH_PCI_BAR_INDEX 4
+
+ struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
+ uint8_t bar_id = HNS3_TX_PUSH_PCI_BAR_INDEX;
+
+ /*
+ * If device support Tx push then its PCIe bar45 must exist, and DPDK
+ * framework will mmap the bar45 default in PCI probe stage.
+ *
+ * In the bar45, the first half is for RoCE (RDMA over Converged
+ * Ethernet), and the second half is for NIC, every TQP occupy 64KB.
+ *
+ * The quick doorbell located at 64B offset in the TQP region.
+ */
+ return (char *)pci_dev->mem_resource[bar_id].addr +
+ (pci_dev->mem_resource[bar_id].len >> 1) +
+ HNS3_TX_PUSH_TQP_REGION_SIZE * queue_id +
+ HNS3_TX_PUSH_QUICK_DOORBELL_OFFSET;
+}
+
+void
+hns3_tx_push_init(struct rte_eth_dev *dev)
+{
+ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ volatile uint32_t *reg;
+ uint32_t val;
+
+ if (!hns3_dev_get_support(hw, TX_PUSH))
+ return;
+
+ reg = (volatile uint32_t *)hns3_tx_push_get_queue_tail_reg(dev, 0);
+ /*
+ * Because the size of bar45 is about 8GB size, it may take a long time
+ * to do the page fault in Tx process when work with vfio-pci, so use
+ * one read operation to make kernel setup page table mapping for bar45
+ * in the init stage.
+ * Note: the bar45 is readable but the result is all 1.
+ */
+ val = *reg;
+ RTE_SET_USED(val);
+}
+
+static void
+hns3_tx_push_queue_init(struct rte_eth_dev *dev,
+ uint16_t queue_id,
+ struct hns3_tx_queue *txq)
+{
+ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ if (!hns3_dev_get_support(hw, TX_PUSH)) {
+ txq->tx_push_enable = false;
+ return;
+ }
+
+ txq->io_tail_reg = (volatile void *)hns3_tx_push_get_queue_tail_reg(dev,
+ queue_id);
+ txq->tx_push_enable = true;
+}
+