+ return true;
+ }
+ *l4_proto = ipv6_hdr->proto;
+ return false;
+}
+
+static void
+hns3_outer_header_cksum_prepare(struct rte_mbuf *m)
+{
+ uint64_t ol_flags = m->ol_flags;
+ uint32_t paylen, hdr_len, l4_proto;
+ struct rte_udp_hdr *udp_hdr;
+
+ if (!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6)))
+ return;
+
+ if (ol_flags & PKT_TX_OUTER_IPV4) {
+ if (hns3_outer_ipv4_cksum_prepared(m, ol_flags, &l4_proto))
+ return;
+ } else {
+ if (hns3_outer_ipv6_cksum_prepared(m, ol_flags, &l4_proto))
+ return;
+ }
+
+ /* driver should ensure the outer udp cksum is 0 for TUNNEL TSO */
+ if (l4_proto == IPPROTO_UDP && (ol_flags & PKT_TX_TCP_SEG)) {
+ hdr_len = m->l2_len + m->l3_len + m->l4_len;
+ hdr_len += m->outer_l2_len + m->outer_l3_len;
+ paylen = m->pkt_len - hdr_len;
+ if (paylen <= m->tso_segsz)
+ return;
+ udp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
+ m->outer_l2_len +
+ m->outer_l3_len);
+ udp_hdr->dgram_cksum = 0;
+ }
+}
+
+static int
+hns3_check_tso_pkt_valid(struct rte_mbuf *m)
+{
+ uint32_t tmp_data_len_sum = 0;
+ uint16_t nb_buf = m->nb_segs;
+ uint32_t paylen, hdr_len;
+ struct rte_mbuf *m_seg;
+ int i;
+
+ if (nb_buf > HNS3_MAX_TSO_BD_PER_PKT)
+ return -EINVAL;
+
+ hdr_len = m->l2_len + m->l3_len + m->l4_len;
+ hdr_len += (m->ol_flags & PKT_TX_TUNNEL_MASK) ?
+ m->outer_l2_len + m->outer_l3_len : 0;
+ if (hdr_len > HNS3_MAX_TSO_HDR_SIZE)
+ return -EINVAL;
+
+ paylen = m->pkt_len - hdr_len;
+ if (paylen > HNS3_MAX_BD_PAYLEN)
+ return -EINVAL;
+
+ /*
+ * The TSO header (include outer and inner L2, L3 and L4 header)
+ * should be provided by three descriptors in maximum in hns3 network
+ * engine.
+ */
+ m_seg = m;
+ for (i = 0; m_seg != NULL && i < HNS3_MAX_TSO_HDR_BD_NUM && i < nb_buf;
+ i++, m_seg = m_seg->next) {
+ tmp_data_len_sum += m_seg->data_len;
+ }
+
+ if (hdr_len > tmp_data_len_sum)
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+static inline int
+hns3_vld_vlan_chk(struct hns3_tx_queue *txq, struct rte_mbuf *m)
+{
+ struct rte_ether_hdr *eh;
+ struct rte_vlan_hdr *vh;
+
+ if (!txq->pvid_sw_shift_en)
+ return 0;
+
+ /*
+ * Due to hardware limitations, we only support two-layer VLAN hardware
+ * offload in Tx direction based on hns3 network engine, so when PVID is
+ * enabled, QinQ insert is no longer supported.
+ * And when PVID is enabled, in the following two cases:
+ * i) packets with more than two VLAN tags.
+ * ii) packets with one VLAN tag while the hardware VLAN insert is
+ * enabled.
+ * The packets will be regarded as abnormal packets and discarded by
+ * hardware in Tx direction. For debugging purposes, a validation check
+ * for these types of packets is added to the '.tx_pkt_prepare' ops
+ * implementation function named hns3_prep_pkts to inform users that
+ * these packets will be discarded.
+ */
+ if (m->ol_flags & PKT_TX_QINQ_PKT)
+ return -EINVAL;
+
+ eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ if (eh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN)) {
+ if (m->ol_flags & PKT_TX_VLAN_PKT)
+ return -EINVAL;
+
+ /* Ensure the incoming packet is not a QinQ packet */
+ vh = (struct rte_vlan_hdr *)(eh + 1);
+ if (vh->eth_proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+static uint16_t
+hns3_udp_cksum_help(struct rte_mbuf *m)
+{
+ uint64_t ol_flags = m->ol_flags;
+ uint16_t cksum = 0;
+ uint32_t l4_len;
+
+ if (ol_flags & PKT_TX_IPV4) {
+ struct rte_ipv4_hdr *ipv4_hdr = rte_pktmbuf_mtod_offset(m,
+ struct rte_ipv4_hdr *, m->l2_len);
+ l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - m->l3_len;
+ } else {
+ struct rte_ipv6_hdr *ipv6_hdr = rte_pktmbuf_mtod_offset(m,
+ struct rte_ipv6_hdr *, m->l2_len);
+ l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
+ }
+
+ rte_raw_cksum_mbuf(m, m->l2_len + m->l3_len, l4_len, &cksum);
+
+ cksum = ~cksum;
+ /*
+ * RFC 768:If the computed checksum is zero for UDP, it is transmitted
+ * as all ones
+ */
+ if (cksum == 0)
+ cksum = 0xffff;
+
+ return (uint16_t)cksum;
+}
+
+static bool
+hns3_validate_tunnel_cksum(struct hns3_tx_queue *tx_queue, struct rte_mbuf *m)
+{
+ uint64_t ol_flags = m->ol_flags;
+ struct rte_udp_hdr *udp_hdr;
+ uint16_t dst_port;
+
+ if (tx_queue->udp_cksum_mode == HNS3_SPECIAL_PORT_HW_CKSUM_MODE ||
+ ol_flags & PKT_TX_TUNNEL_MASK ||
+ (ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM)
+ return true;
+ /*
+ * A UDP packet with the same dst_port as VXLAN\VXLAN_GPE\GENEVE will
+ * be recognized as a tunnel packet in HW. In this case, if UDP CKSUM
+ * offload is set and the tunnel mask has not been set, the CKSUM will
+ * be wrong since the header length is wrong and driver should complete
+ * the CKSUM to avoid CKSUM error.
+ */
+ udp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
+ m->l2_len + m->l3_len);
+ dst_port = rte_be_to_cpu_16(udp_hdr->dst_port);
+ switch (dst_port) {
+ case RTE_VXLAN_DEFAULT_PORT:
+ case RTE_VXLAN_GPE_DEFAULT_PORT:
+ case RTE_GENEVE_DEFAULT_PORT:
+ udp_hdr->dgram_cksum = hns3_udp_cksum_help(m);
+ m->ol_flags = ol_flags & ~PKT_TX_L4_MASK;
+ return false;
+ default:
+ return true;
+ }
+}
+
+static int
+hns3_prep_pkt_proc(struct hns3_tx_queue *tx_queue, struct rte_mbuf *m)
+{
+ int ret;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+ ret = rte_validate_tx_offload(m);
+ if (ret != 0) {
+ rte_errno = -ret;
+ return ret;
+ }
+
+ ret = hns3_vld_vlan_chk(tx_queue, m);
+ if (ret != 0) {
+ rte_errno = EINVAL;
+ return ret;
+ }
+#endif
+ if (hns3_pkt_is_tso(m)) {
+ if (hns3_pkt_need_linearized(m, m->nb_segs,
+ tx_queue->max_non_tso_bd_num) ||
+ hns3_check_tso_pkt_valid(m)) {
+ rte_errno = EINVAL;
+ return -EINVAL;
+ }
+
+ if (tx_queue->tso_mode != HNS3_TSO_SW_CAL_PSEUDO_H_CSUM) {
+ /*
+ * (tso mode != HNS3_TSO_SW_CAL_PSEUDO_H_CSUM) means
+ * hardware support recalculate the TCP pseudo header
+ * checksum of packets that need TSO, so network driver
+ * software not need to recalculate it.
+ */
+ hns3_outer_header_cksum_prepare(m);
+ return 0;
+ }
+ }
+
+ ret = rte_net_intel_cksum_prepare(m);
+ if (ret != 0) {
+ rte_errno = -ret;
+ return ret;
+ }
+
+ if (!hns3_validate_tunnel_cksum(tx_queue, m))
+ return 0;
+
+ hns3_outer_header_cksum_prepare(m);
+
+ return 0;
+}
+
+uint16_t
+hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_mbuf *m;
+ uint16_t i;
+
+ for (i = 0; i < nb_pkts; i++) {
+ m = tx_pkts[i];
+ if (hns3_prep_pkt_proc(tx_queue, m))
+ return i;
+ }
+
+ return i;
+}
+
+static int
+hns3_parse_cksum(struct hns3_tx_queue *txq, uint16_t tx_desc_id,
+ struct rte_mbuf *m)
+{
+ struct hns3_desc *tx_ring = txq->tx_ring;
+ struct hns3_desc *desc = &tx_ring[tx_desc_id];
+
+ /* Enable checksum offloading */
+ if (m->ol_flags & HNS3_TX_CKSUM_OFFLOAD_MASK) {
+ /* Fill in tunneling parameters if necessary */
+ if (hns3_parse_tunneling_params(txq, m, tx_desc_id)) {
+ txq->dfx_stats.unsupported_tunnel_pkt_cnt++;
+ return -EINVAL;
+ }
+
+ hns3_txd_enable_checksum(txq, m, tx_desc_id);
+ } else {
+ /* clear the control bit */
+ desc->tx.type_cs_vlan_tso_len = 0;
+ desc->tx.ol_type_vlan_len_msec = 0;
+ }
+
+ return 0;
+}
+
+static int
+hns3_check_non_tso_pkt(uint16_t nb_buf, struct rte_mbuf **m_seg,
+ struct rte_mbuf *tx_pkt, struct hns3_tx_queue *txq)
+{
+ uint8_t max_non_tso_bd_num;
+ struct rte_mbuf *new_pkt;
+ int ret;
+
+ if (hns3_pkt_is_tso(*m_seg))
+ return 0;
+
+ /*
+ * If packet length is greater than HNS3_MAX_FRAME_LEN
+ * driver support, the packet will be ignored.
+ */
+ if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) > HNS3_MAX_FRAME_LEN)) {
+ txq->dfx_stats.over_length_pkt_cnt++;
+ return -EINVAL;
+ }
+
+ max_non_tso_bd_num = txq->max_non_tso_bd_num;
+ if (unlikely(nb_buf > max_non_tso_bd_num)) {
+ txq->dfx_stats.exceed_limit_bd_pkt_cnt++;
+ ret = hns3_reassemble_tx_pkts(tx_pkt, &new_pkt,
+ max_non_tso_bd_num);
+ if (ret) {
+ txq->dfx_stats.exceed_limit_bd_reassem_fail++;
+ return ret;
+ }
+ *m_seg = new_pkt;
+ }
+
+ return 0;
+}
+
+static inline void
+hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
+{
+ struct hns3_entry *tx_entry;
+ struct hns3_desc *desc;
+ uint16_t tx_next_clean;
+ int i;
+
+ while (1) {
+ if (HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) < txq->tx_rs_thresh)
+ break;
+
+ /*
+ * All mbufs can be released only when the VLD bits of all
+ * descriptors in a batch are cleared.
+ */
+ tx_next_clean = (txq->next_to_clean + txq->tx_rs_thresh - 1) %
+ txq->nb_tx_desc;
+ desc = &txq->tx_ring[tx_next_clean];
+ for (i = 0; i < txq->tx_rs_thresh; i++) {
+ if (rte_le_to_cpu_16(desc->tx.tp_fe_sc_vld_ra_ri) &
+ BIT(HNS3_TXD_VLD_B))
+ return;
+ desc--;
+ }
+
+ tx_entry = &txq->sw_ring[txq->next_to_clean];
+
+ for (i = 0; i < txq->tx_rs_thresh; i++)
+ rte_prefetch0((tx_entry + i)->mbuf);
+ for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
+ rte_mempool_put(tx_entry->mbuf->pool, tx_entry->mbuf);
+ tx_entry->mbuf = NULL;
+ }
+
+ txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc;
+ txq->tx_bd_ready += txq->tx_rs_thresh;
+ }
+}
+
+static inline void
+hns3_tx_backup_1mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts)
+{
+ tx_entry->mbuf = pkts[0];
+}
+
+static inline void
+hns3_tx_backup_4mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts)
+{
+ hns3_tx_backup_1mbuf(&tx_entry[0], &pkts[0]);
+ hns3_tx_backup_1mbuf(&tx_entry[1], &pkts[1]);
+ hns3_tx_backup_1mbuf(&tx_entry[2], &pkts[2]);
+ hns3_tx_backup_1mbuf(&tx_entry[3], &pkts[3]);
+}
+
+static inline void
+hns3_tx_setup_4bd(struct hns3_desc *txdp, struct rte_mbuf **pkts)
+{
+#define PER_LOOP_NUM 4
+ const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B);
+ uint64_t dma_addr;
+ uint32_t i;
+
+ for (i = 0; i < PER_LOOP_NUM; i++, txdp++, pkts++) {
+ dma_addr = rte_mbuf_data_iova(*pkts);
+ txdp->addr = rte_cpu_to_le_64(dma_addr);
+ txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len);
+ txdp->tx.paylen_fd_dop_ol4cs = 0;
+ txdp->tx.type_cs_vlan_tso_len = 0;
+ txdp->tx.ol_type_vlan_len_msec = 0;
+ txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag);
+ }
+}
+
+static inline void
+hns3_tx_setup_1bd(struct hns3_desc *txdp, struct rte_mbuf **pkts)
+{
+ const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B);
+ uint64_t dma_addr;
+
+ dma_addr = rte_mbuf_data_iova(*pkts);
+ txdp->addr = rte_cpu_to_le_64(dma_addr);
+ txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len);
+ txdp->tx.paylen_fd_dop_ol4cs = 0;
+ txdp->tx.type_cs_vlan_tso_len = 0;
+ txdp->tx.ol_type_vlan_len_msec = 0;
+ txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag);
+}
+
+static inline void
+hns3_tx_fill_hw_ring(struct hns3_tx_queue *txq,
+ struct rte_mbuf **pkts,
+ uint16_t nb_pkts)
+{
+#define PER_LOOP_NUM 4
+#define PER_LOOP_MASK (PER_LOOP_NUM - 1)
+ struct hns3_desc *txdp = &txq->tx_ring[txq->next_to_use];
+ struct hns3_entry *tx_entry = &txq->sw_ring[txq->next_to_use];
+ const uint32_t mainpart = (nb_pkts & ((uint32_t)~PER_LOOP_MASK));
+ const uint32_t leftover = (nb_pkts & ((uint32_t)PER_LOOP_MASK));
+ uint32_t i;
+
+ for (i = 0; i < mainpart; i += PER_LOOP_NUM) {
+ hns3_tx_backup_4mbuf(tx_entry + i, pkts + i);
+ hns3_tx_setup_4bd(txdp + i, pkts + i);
+
+ /* Increment bytes counter */
+ uint32_t j;
+ for (j = 0; j < PER_LOOP_NUM; j++)
+ txq->basic_stats.bytes += pkts[i + j]->pkt_len;
+ }
+ if (unlikely(leftover > 0)) {
+ for (i = 0; i < leftover; i++) {
+ hns3_tx_backup_1mbuf(tx_entry + mainpart + i,
+ pkts + mainpart + i);
+ hns3_tx_setup_1bd(txdp + mainpart + i,
+ pkts + mainpart + i);
+
+ /* Increment bytes counter */
+ txq->basic_stats.bytes += pkts[mainpart + i]->pkt_len;
+ }
+ }
+}
+
+uint16_t
+hns3_xmit_pkts_simple(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct hns3_tx_queue *txq = tx_queue;
+ uint16_t nb_tx = 0;
+
+ hns3_tx_free_buffer_simple(txq);
+
+ nb_pkts = RTE_MIN(txq->tx_bd_ready, nb_pkts);
+ if (unlikely(nb_pkts == 0)) {
+ if (txq->tx_bd_ready == 0)
+ txq->dfx_stats.queue_full_cnt++;
+ return 0;
+ }
+
+ txq->tx_bd_ready -= nb_pkts;
+ if (txq->next_to_use + nb_pkts > txq->nb_tx_desc) {
+ nb_tx = txq->nb_tx_desc - txq->next_to_use;
+ hns3_tx_fill_hw_ring(txq, tx_pkts, nb_tx);
+ txq->next_to_use = 0;
+ }
+
+ hns3_tx_fill_hw_ring(txq, tx_pkts + nb_tx, nb_pkts - nb_tx);
+ txq->next_to_use += nb_pkts - nb_tx;
+
+ hns3_write_reg_opt(txq->io_tail_reg, nb_pkts);
+
+ return nb_pkts;
+}
+
+uint16_t
+hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct hns3_tx_queue *txq = tx_queue;
+ struct hns3_entry *tx_bak_pkt;
+ struct hns3_desc *tx_ring;
+ struct rte_mbuf *tx_pkt;
+ struct rte_mbuf *m_seg;
+ struct hns3_desc *desc;
+ uint32_t nb_hold = 0;
+ uint16_t tx_next_use;
+ uint16_t tx_pkt_num;
+ uint16_t tx_bd_max;
+ uint16_t nb_buf;
+ uint16_t nb_tx;
+ uint16_t i;
+
+ /* free useless buffer */
+ hns3_tx_free_useless_buffer(txq);