+static inline void
+hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
+{
+ struct hns3_entry *tx_entry;
+ struct hns3_desc *desc;
+ uint16_t tx_next_clean;
+ int i;
+
+ while (1) {
+ if (HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) < txq->tx_rs_thresh)
+ break;
+
+ /*
+ * All mbufs can be released only when the VLD bits of all
+ * descriptors in a batch are cleared.
+ */
+ tx_next_clean = (txq->next_to_clean + txq->tx_rs_thresh - 1) %
+ txq->nb_tx_desc;
+ desc = &txq->tx_ring[tx_next_clean];
+ for (i = 0; i < txq->tx_rs_thresh; i++) {
+ if (rte_le_to_cpu_16(desc->tx.tp_fe_sc_vld_ra_ri) &
+ BIT(HNS3_TXD_VLD_B))
+ return;
+ desc--;
+ }
+
+ tx_entry = &txq->sw_ring[txq->next_to_clean];
+
+ for (i = 0; i < txq->tx_rs_thresh; i++)
+ rte_prefetch0((tx_entry + i)->mbuf);
+ for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
+ rte_mempool_put(tx_entry->mbuf->pool, tx_entry->mbuf);
+ tx_entry->mbuf = NULL;
+ }
+
+ txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc;
+ txq->tx_bd_ready += txq->tx_rs_thresh;
+ }
+}
+
+static inline void
+hns3_tx_backup_1mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts)
+{
+ tx_entry->mbuf = pkts[0];
+}
+
+static inline void
+hns3_tx_backup_4mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts)
+{
+ hns3_tx_backup_1mbuf(&tx_entry[0], &pkts[0]);
+ hns3_tx_backup_1mbuf(&tx_entry[1], &pkts[1]);
+ hns3_tx_backup_1mbuf(&tx_entry[2], &pkts[2]);
+ hns3_tx_backup_1mbuf(&tx_entry[3], &pkts[3]);
+}
+
+static inline void
+hns3_tx_setup_4bd(struct hns3_desc *txdp, struct rte_mbuf **pkts)
+{
+#define PER_LOOP_NUM 4
+ const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B);
+ uint64_t dma_addr;
+ uint32_t i;
+
+ for (i = 0; i < PER_LOOP_NUM; i++, txdp++, pkts++) {
+ dma_addr = rte_mbuf_data_iova(*pkts);
+ txdp->addr = rte_cpu_to_le_64(dma_addr);
+ txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len);
+ txdp->tx.paylen = 0;
+ txdp->tx.type_cs_vlan_tso_len = 0;
+ txdp->tx.ol_type_vlan_len_msec = 0;
+ txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag);
+ }
+}
+
+static inline void
+hns3_tx_setup_1bd(struct hns3_desc *txdp, struct rte_mbuf **pkts)
+{
+ const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B);
+ uint64_t dma_addr;
+
+ dma_addr = rte_mbuf_data_iova(*pkts);
+ txdp->addr = rte_cpu_to_le_64(dma_addr);
+ txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len);
+ txdp->tx.paylen = 0;
+ txdp->tx.type_cs_vlan_tso_len = 0;
+ txdp->tx.ol_type_vlan_len_msec = 0;
+ txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag);
+}
+
+static inline void
+hns3_tx_fill_hw_ring(struct hns3_tx_queue *txq,
+ struct rte_mbuf **pkts,
+ uint16_t nb_pkts)
+{
+#define PER_LOOP_NUM 4
+#define PER_LOOP_MASK (PER_LOOP_NUM - 1)
+ struct hns3_desc *txdp = &txq->tx_ring[txq->next_to_use];
+ struct hns3_entry *tx_entry = &txq->sw_ring[txq->next_to_use];
+ const uint32_t mainpart = (nb_pkts & ((uint32_t)~PER_LOOP_MASK));
+ const uint32_t leftover = (nb_pkts & ((uint32_t)PER_LOOP_MASK));
+ uint32_t i;
+
+ for (i = 0; i < mainpart; i += PER_LOOP_NUM) {
+ hns3_tx_backup_4mbuf(tx_entry + i, pkts + i);
+ hns3_tx_setup_4bd(txdp + i, pkts + i);
+ }
+ if (unlikely(leftover > 0)) {
+ for (i = 0; i < leftover; i++) {
+ hns3_tx_backup_1mbuf(tx_entry + mainpart + i,
+ pkts + mainpart + i);
+ hns3_tx_setup_1bd(txdp + mainpart + i,
+ pkts + mainpart + i);
+ }
+ }
+}
+
+uint16_t
+hns3_xmit_pkts_simple(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct hns3_tx_queue *txq = tx_queue;
+ uint16_t nb_tx = 0;
+
+ hns3_tx_free_buffer_simple(txq);
+
+ nb_pkts = RTE_MIN(txq->tx_bd_ready, nb_pkts);
+ if (unlikely(nb_pkts == 0)) {
+ if (txq->tx_bd_ready == 0)
+ txq->queue_full_cnt++;
+ return 0;
+ }
+
+ txq->tx_bd_ready -= nb_pkts;
+ if (txq->next_to_use + nb_pkts > txq->nb_tx_desc) {
+ nb_tx = txq->nb_tx_desc - txq->next_to_use;
+ hns3_tx_fill_hw_ring(txq, tx_pkts, nb_tx);
+ txq->next_to_use = 0;
+ }
+
+ hns3_tx_fill_hw_ring(txq, tx_pkts + nb_tx, nb_pkts - nb_tx);
+ txq->next_to_use += nb_pkts - nb_tx;
+
+ hns3_write_reg_opt(txq->io_tail_reg, nb_pkts);
+
+ return nb_pkts;
+}
+