+ tmp |= hns3_gen_field_val(HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
+ HNS3_OL3T_IPV6);
+ }
+ /* OL3 header size, defined in 4 bytes */
+ tmp |= hns3_gen_field_val(HNS3_TXD_L3LEN_M, HNS3_TXD_L3LEN_S,
+ m->outer_l3_len >> HNS3_L3_LEN_UNIT);
+ *ol_type_vlan_len_msec = tmp;
+}
+
+static int
+hns3_parse_inner_params(struct rte_mbuf *m, uint32_t *ol_type_vlan_len_msec,
+ uint32_t *type_cs_vlan_tso_len)
+{
+#define HNS3_NVGRE_HLEN 8
+ uint32_t tmp_outer = *ol_type_vlan_len_msec;
+ uint32_t tmp_inner = *type_cs_vlan_tso_len;
+ uint64_t ol_flags = m->ol_flags;
+ uint16_t inner_l2_len;
+
+ switch (ol_flags & PKT_TX_TUNNEL_MASK) {
+ case PKT_TX_TUNNEL_VXLAN_GPE:
+ case PKT_TX_TUNNEL_GENEVE:
+ case PKT_TX_TUNNEL_VXLAN:
+ /* MAC in UDP tunnelling packet, include VxLAN and GENEVE */
+ tmp_outer |= hns3_gen_field_val(HNS3_TXD_TUNTYPE_M,
+ HNS3_TXD_TUNTYPE_S, HNS3_TUN_MAC_IN_UDP);
+ /*
+ * The inner l2 length of mbuf is the sum of outer l4 length,
+ * tunneling header length and inner l2 length for a tunnel
+ * packect. But in hns3 tx descriptor, the tunneling header
+ * length is contained in the field of outer L4 length.
+ * Therefore, driver need to calculate the outer L4 length and
+ * inner L2 length.
+ */
+ tmp_outer |= hns3_gen_field_val(HNS3_TXD_L4LEN_M,
+ HNS3_TXD_L4LEN_S,
+ (uint8_t)RTE_ETHER_VXLAN_HLEN >>
+ HNS3_L4_LEN_UNIT);
+
+ inner_l2_len = m->l2_len - RTE_ETHER_VXLAN_HLEN;
+ break;
+ case PKT_TX_TUNNEL_GRE:
+ tmp_outer |= hns3_gen_field_val(HNS3_TXD_TUNTYPE_M,
+ HNS3_TXD_TUNTYPE_S, HNS3_TUN_NVGRE);
+ /*
+ * For NVGRE tunnel packect, the outer L4 is empty. So only
+ * fill the NVGRE header length to the outer L4 field.
+ */
+ tmp_outer |= hns3_gen_field_val(HNS3_TXD_L4LEN_M,
+ HNS3_TXD_L4LEN_S,
+ (uint8_t)HNS3_NVGRE_HLEN >> HNS3_L4_LEN_UNIT);
+
+ inner_l2_len = m->l2_len - HNS3_NVGRE_HLEN;
+ break;
+ default:
+ /* For non UDP / GRE tunneling, drop the tunnel packet */
+ return -EINVAL;
+ }
+
+ tmp_inner |= hns3_gen_field_val(HNS3_TXD_L2LEN_M, HNS3_TXD_L2LEN_S,
+ inner_l2_len >> HNS3_L2_LEN_UNIT);
+ /* OL2 header size, defined in 2 bytes */
+ tmp_outer |= hns3_gen_field_val(HNS3_TXD_L2LEN_M, HNS3_TXD_L2LEN_S,
+ m->outer_l2_len >> HNS3_L2_LEN_UNIT);
+
+ *type_cs_vlan_tso_len = tmp_inner;
+ *ol_type_vlan_len_msec = tmp_outer;
+
+ return 0;
+}
+
+static int
+hns3_parse_tunneling_params(struct hns3_tx_queue *txq, struct rte_mbuf *m,
+ uint16_t tx_desc_id)
+{
+ struct hns3_desc *tx_ring = txq->tx_ring;
+ struct hns3_desc *desc = &tx_ring[tx_desc_id];
+ uint32_t tmp_outer = 0;
+ uint32_t tmp_inner = 0;
+ int ret;
+
+ /*
+ * The tunnel header is contained in the inner L2 header field of the
+ * mbuf, but for hns3 descriptor, it is contained in the outer L4. So,
+ * there is a need that switching between them. To avoid multiple
+ * calculations, the length of the L2 header include the outer and
+ * inner, will be filled during the parsing of tunnel packects.
+ */
+ if (!(m->ol_flags & PKT_TX_TUNNEL_MASK)) {
+ /*
+ * For non tunnel type the tunnel type id is 0, so no need to
+ * assign a value to it. Only the inner(normal) L2 header length
+ * is assigned.
+ */
+ tmp_inner |= hns3_gen_field_val(HNS3_TXD_L2LEN_M,
+ HNS3_TXD_L2LEN_S, m->l2_len >> HNS3_L2_LEN_UNIT);
+ } else {
+ /*
+ * If outer csum is not offload, the outer length may be filled
+ * with 0. And the length of the outer header is added to the
+ * inner l2_len. It would lead a cksum error. So driver has to
+ * calculate the header length.
+ */
+ if (unlikely(!(m->ol_flags & PKT_TX_OUTER_IP_CKSUM) &&
+ m->outer_l2_len == 0)) {
+ struct rte_net_hdr_lens hdr_len;
+ (void)rte_net_get_ptype(m, &hdr_len,
+ RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK);
+ m->outer_l3_len = hdr_len.l3_len;
+ m->outer_l2_len = hdr_len.l2_len;
+ m->l2_len = m->l2_len - hdr_len.l2_len - hdr_len.l3_len;
+ }
+ hns3_parse_outer_params(m, &tmp_outer);
+ ret = hns3_parse_inner_params(m, &tmp_outer, &tmp_inner);
+ if (ret)
+ return -EINVAL;
+ }
+
+ desc->tx.ol_type_vlan_len_msec = rte_cpu_to_le_32(tmp_outer);
+ desc->tx.type_cs_vlan_tso_len = rte_cpu_to_le_32(tmp_inner);
+
+ return 0;
+}
+
+static void
+hns3_parse_l3_cksum_params(struct rte_mbuf *m, uint32_t *type_cs_vlan_tso_len)
+{
+ uint64_t ol_flags = m->ol_flags;
+ uint32_t l3_type;
+ uint32_t tmp;
+
+ tmp = *type_cs_vlan_tso_len;
+ if (ol_flags & PKT_TX_IPV4)
+ l3_type = HNS3_L3T_IPV4;
+ else if (ol_flags & PKT_TX_IPV6)
+ l3_type = HNS3_L3T_IPV6;
+ else
+ l3_type = HNS3_L3T_NONE;
+
+ /* inner(/normal) L3 header size, defined in 4 bytes */
+ tmp |= hns3_gen_field_val(HNS3_TXD_L3LEN_M, HNS3_TXD_L3LEN_S,
+ m->l3_len >> HNS3_L3_LEN_UNIT);
+
+ tmp |= hns3_gen_field_val(HNS3_TXD_L3T_M, HNS3_TXD_L3T_S, l3_type);
+
+ /* Enable L3 checksum offloads */
+ if (ol_flags & PKT_TX_IP_CKSUM)
+ tmp |= BIT(HNS3_TXD_L3CS_B);
+ *type_cs_vlan_tso_len = tmp;
+}
+
+static void
+hns3_parse_l4_cksum_params(struct rte_mbuf *m, uint32_t *type_cs_vlan_tso_len)
+{
+ uint64_t ol_flags = m->ol_flags;
+ uint32_t tmp;
+ /* Enable L4 checksum offloads */
+ switch (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) {
+ case PKT_TX_TCP_CKSUM:
+ case PKT_TX_TCP_SEG:
+ tmp = *type_cs_vlan_tso_len;
+ tmp |= hns3_gen_field_val(HNS3_TXD_L4T_M, HNS3_TXD_L4T_S,
+ HNS3_L4T_TCP);
+ break;
+ case PKT_TX_UDP_CKSUM:
+ tmp = *type_cs_vlan_tso_len;
+ tmp |= hns3_gen_field_val(HNS3_TXD_L4T_M, HNS3_TXD_L4T_S,
+ HNS3_L4T_UDP);
+ break;
+ case PKT_TX_SCTP_CKSUM:
+ tmp = *type_cs_vlan_tso_len;
+ tmp |= hns3_gen_field_val(HNS3_TXD_L4T_M, HNS3_TXD_L4T_S,
+ HNS3_L4T_SCTP);
+ break;
+ default:
+ return;
+ }
+ tmp |= BIT(HNS3_TXD_L4CS_B);
+ tmp |= hns3_gen_field_val(HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
+ m->l4_len >> HNS3_L4_LEN_UNIT);
+ *type_cs_vlan_tso_len = tmp;
+}
+
+static void
+hns3_txd_enable_checksum(struct hns3_tx_queue *txq, struct rte_mbuf *m,
+ uint16_t tx_desc_id)
+{
+ struct hns3_desc *tx_ring = txq->tx_ring;
+ struct hns3_desc *desc = &tx_ring[tx_desc_id];
+ uint32_t value = 0;
+
+ hns3_parse_l3_cksum_params(m, &value);
+ hns3_parse_l4_cksum_params(m, &value);
+
+ desc->tx.type_cs_vlan_tso_len |= rte_cpu_to_le_32(value);
+}
+
+static bool
+hns3_pkt_need_linearized(struct rte_mbuf *tx_pkts, uint32_t bd_num,
+ uint32_t max_non_tso_bd_num)
+{
+ struct rte_mbuf *m_first = tx_pkts;
+ struct rte_mbuf *m_last = tx_pkts;
+ uint32_t tot_len = 0;
+ uint32_t hdr_len;
+ uint32_t i;
+
+ /*
+ * Hardware requires that the sum of the data length of every 8
+ * consecutive buffers is greater than MSS in hns3 network engine.
+ * We simplify it by ensuring pkt_headlen + the first 8 consecutive
+ * frags greater than gso header len + mss, and the remaining 7
+ * consecutive frags greater than MSS except the last 7 frags.
+ */
+ if (bd_num <= max_non_tso_bd_num)
+ return false;
+
+ for (i = 0; m_last && i < max_non_tso_bd_num - 1;
+ i++, m_last = m_last->next)
+ tot_len += m_last->data_len;
+
+ if (!m_last)
+ return true;
+
+ /* ensure the first 8 frags is greater than mss + header */
+ hdr_len = tx_pkts->l2_len + tx_pkts->l3_len + tx_pkts->l4_len;
+ hdr_len += (tx_pkts->ol_flags & PKT_TX_TUNNEL_MASK) ?
+ tx_pkts->outer_l2_len + tx_pkts->outer_l3_len : 0;
+ if (tot_len + m_last->data_len < tx_pkts->tso_segsz + hdr_len)
+ return true;
+
+ /*
+ * ensure the sum of the data length of every 7 consecutive buffer
+ * is greater than mss except the last one.
+ */
+ for (i = 0; m_last && i < bd_num - max_non_tso_bd_num; i++) {
+ tot_len -= m_first->data_len;
+ tot_len += m_last->data_len;
+
+ if (tot_len < tx_pkts->tso_segsz)
+ return true;
+
+ m_first = m_first->next;
+ m_last = m_last->next;
+ }
+
+ return false;
+}
+
+static void
+hns3_outer_header_cksum_prepare(struct rte_mbuf *m)
+{
+ uint64_t ol_flags = m->ol_flags;
+ uint32_t paylen, hdr_len, l4_proto;
+
+ if (!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6)))
+ return;
+
+ if (ol_flags & PKT_TX_OUTER_IPV4) {
+ struct rte_ipv4_hdr *ipv4_hdr;
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+ m->outer_l2_len);
+ l4_proto = ipv4_hdr->next_proto_id;
+ if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ ipv4_hdr->hdr_checksum = 0;
+ } else {
+ struct rte_ipv6_hdr *ipv6_hdr;
+ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+ m->outer_l2_len);
+ l4_proto = ipv6_hdr->proto;
+ }
+ /* driver should ensure the outer udp cksum is 0 for TUNNEL TSO */
+ if (l4_proto == IPPROTO_UDP && (ol_flags & PKT_TX_TCP_SEG)) {
+ struct rte_udp_hdr *udp_hdr;
+ hdr_len = m->l2_len + m->l3_len + m->l4_len;
+ hdr_len += m->outer_l2_len + m->outer_l3_len;
+ paylen = m->pkt_len - hdr_len;
+ if (paylen <= m->tso_segsz)
+ return;
+ udp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
+ m->outer_l2_len +
+ m->outer_l3_len);
+ udp_hdr->dgram_cksum = 0;
+ }
+}
+
+static int
+hns3_check_tso_pkt_valid(struct rte_mbuf *m)
+{
+ uint32_t tmp_data_len_sum = 0;
+ uint16_t nb_buf = m->nb_segs;
+ uint32_t paylen, hdr_len;
+ struct rte_mbuf *m_seg;
+ int i;
+
+ if (nb_buf > HNS3_MAX_TSO_BD_PER_PKT)
+ return -EINVAL;
+
+ hdr_len = m->l2_len + m->l3_len + m->l4_len;
+ hdr_len += (m->ol_flags & PKT_TX_TUNNEL_MASK) ?
+ m->outer_l2_len + m->outer_l3_len : 0;
+ if (hdr_len > HNS3_MAX_TSO_HDR_SIZE)
+ return -EINVAL;
+
+ paylen = m->pkt_len - hdr_len;
+ if (paylen > HNS3_MAX_BD_PAYLEN)
+ return -EINVAL;
+
+ /*
+ * The TSO header (include outer and inner L2, L3 and L4 header)
+ * should be provided by three descriptors in maximum in hns3 network
+ * engine.
+ */
+ m_seg = m;
+ for (i = 0; m_seg != NULL && i < HNS3_MAX_TSO_HDR_BD_NUM && i < nb_buf;
+ i++, m_seg = m_seg->next) {
+ tmp_data_len_sum += m_seg->data_len;
+ }
+
+ if (hdr_len > tmp_data_len_sum)
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+static inline int
+hns3_vld_vlan_chk(struct hns3_tx_queue *txq, struct rte_mbuf *m)
+{
+ struct rte_ether_hdr *eh;
+ struct rte_vlan_hdr *vh;
+
+ if (!txq->pvid_sw_shift_en)
+ return 0;
+
+ /*
+ * Due to hardware limitations, we only support two-layer VLAN hardware
+ * offload in Tx direction based on hns3 network engine, so when PVID is
+ * enabled, QinQ insert is no longer supported.
+ * And when PVID is enabled, in the following two cases:
+ * i) packets with more than two VLAN tags.
+ * ii) packets with one VLAN tag while the hardware VLAN insert is
+ * enabled.
+ * The packets will be regarded as abnormal packets and discarded by
+ * hardware in Tx direction. For debugging purposes, a validation check
+ * for these types of packets is added to the '.tx_pkt_prepare' ops
+ * implementation function named hns3_prep_pkts to inform users that
+ * these packets will be discarded.
+ */
+ if (m->ol_flags & PKT_TX_QINQ_PKT)
+ return -EINVAL;
+
+ eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ if (eh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN)) {
+ if (m->ol_flags & PKT_TX_VLAN_PKT)
+ return -EINVAL;
+
+ /* Ensure the incoming packet is not a QinQ packet */
+ vh = (struct rte_vlan_hdr *)(eh + 1);
+ if (vh->eth_proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+static int
+hns3_prep_pkt_proc(struct hns3_tx_queue *tx_queue, struct rte_mbuf *m)
+{
+ int ret;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+ ret = rte_validate_tx_offload(m);
+ if (ret != 0) {
+ rte_errno = -ret;
+ return ret;
+ }
+
+ ret = hns3_vld_vlan_chk(tx_queue, m);
+ if (ret != 0) {
+ rte_errno = EINVAL;
+ return ret;
+ }
+#endif
+ if (hns3_pkt_is_tso(m)) {
+ if (hns3_pkt_need_linearized(m, m->nb_segs,
+ tx_queue->max_non_tso_bd_num) ||
+ hns3_check_tso_pkt_valid(m)) {
+ rte_errno = EINVAL;
+ return -EINVAL;
+ }
+
+ if (tx_queue->tso_mode != HNS3_TSO_SW_CAL_PSEUDO_H_CSUM) {
+ /*
+ * (tso mode != HNS3_TSO_SW_CAL_PSEUDO_H_CSUM) means
+ * hardware support recalculate the TCP pseudo header
+ * checksum of packets that need TSO, so network driver
+ * software not need to recalculate it.
+ */
+ hns3_outer_header_cksum_prepare(m);
+ return 0;
+ }
+ }
+
+ ret = rte_net_intel_cksum_prepare(m);
+ if (ret != 0) {
+ rte_errno = -ret;
+ return ret;
+ }
+
+ hns3_outer_header_cksum_prepare(m);
+
+ return 0;
+}
+
+uint16_t
+hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_mbuf *m;
+ uint16_t i;
+
+ for (i = 0; i < nb_pkts; i++) {
+ m = tx_pkts[i];
+ if (hns3_prep_pkt_proc(tx_queue, m))
+ return i;