X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fhns3%2Fhns3_rxtx.h;h=4be9c4a4767023b4f282d172350135e78f9ec19f;hb=be797cbf4582f3c474f208aeb3d1baa4001a6156;hp=c1a34e2f691ef87923ae03a3d14ee37345db8473;hpb=323df8941b57027752ee9d191f1ac6f359bd524e;p=dpdk.git diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index c1a34e2f69..4be9c4a476 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -10,7 +10,28 @@ #define HNS3_DEFAULT_RING_DESC 1024 #define HNS3_ALIGN_RING_DESC 32 #define HNS3_RING_BASE_ALIGN 128 +#define HNS3_BULK_ALLOC_MBUF_NUM 32 + #define HNS3_DEFAULT_RX_FREE_THRESH 32 +#define HNS3_DEFAULT_TX_FREE_THRESH 32 +#define HNS3_DEFAULT_TX_RS_THRESH 32 +#define HNS3_TX_FAST_FREE_AHEAD 64 + +#define HNS3_DEFAULT_RX_BURST 32 +#if (HNS3_DEFAULT_RX_BURST > 64) +#error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n" +#endif +#define HNS3_DEFAULT_DESCS_PER_LOOP 4 +#define HNS3_SVE_DEFAULT_DESCS_PER_LOOP 8 +#if (HNS3_DEFAULT_DESCS_PER_LOOP > HNS3_SVE_DEFAULT_DESCS_PER_LOOP) +#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN HNS3_DEFAULT_DESCS_PER_LOOP +#else +#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN HNS3_SVE_DEFAULT_DESCS_PER_LOOP +#endif +#define HNS3_DEFAULT_RXQ_REARM_THRESH 64 +#define HNS3_UINT8_BIT 8 +#define HNS3_UINT16_BIT 16 +#define HNS3_UINT32_BIT 32 #define HNS3_512_BD_BUF_SIZE 512 #define HNS3_1K_BD_BUF_SIZE 1024 @@ -46,7 +67,7 @@ #define HNS3_RXD_L2E_B 16 #define HNS3_RXD_L3E_B 17 #define HNS3_RXD_L4E_B 18 -#define HNS3_RXD_TRUNCAT_B 19 +#define HNS3_RXD_TRUNCATE_B 19 #define HNS3_RXD_HOI_B 20 #define HNS3_RXD_DOI_B 21 #define HNS3_RXD_OL3E_B 22 @@ -127,6 +148,13 @@ #define HNS3_L3_LEN_UNIT 2UL #define HNS3_L4_LEN_UNIT 2UL +#define HNS3_TXD_DEFAULT_BDTYPE 0 +#define HNS3_TXD_VLD_CMD (0x1 << HNS3_TXD_VLD_B) +#define HNS3_TXD_FE_CMD (0x1 << HNS3_TXD_FE_B) +#define HNS3_TXD_DEFAULT_VLD_FE_BDTYPE \ + (HNS3_TXD_VLD_CMD | HNS3_TXD_FE_CMD | HNS3_TXD_DEFAULT_BDTYPE) +#define HNS3_TXD_SEND_SIZE_SHIFT 16 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, @@ -220,7 +248,13 @@ struct hns3_desc { uint16_t ot_vlan_tag; }; }; - uint32_t bd_base_info; + union { + uint32_t bd_base_info; + struct { + uint16_t bdtype_vld_udp0; + uint16_t fe_lum_crcp_l3l4p; + }; + }; } rx; }; } __rte_packed; @@ -233,49 +267,67 @@ struct hns3_rx_queue { void *io_base; volatile void *io_head_reg; struct hns3_adapter *hns; + struct hns3_ptype_table *ptype_tbl; struct rte_mempool *mb_pool; struct hns3_desc *rx_ring; uint64_t rx_ring_phys_addr; /* RX ring DMA address */ const struct rte_memzone *mz; struct hns3_entry *sw_ring; - struct rte_mbuf *pkt_first_seg; struct rte_mbuf *pkt_last_seg; uint16_t queue_id; uint16_t port_id; uint16_t nb_rx_desc; - uint16_t next_to_use; uint16_t rx_buf_len; /* * threshold for the number of BDs waited to passed to hardware. If the * number exceeds the threshold, driver will pass these BDs to hardware. */ uint16_t rx_free_thresh; + uint16_t next_to_use; /* index of next BD to be polled */ uint16_t rx_free_hold; /* num of BDs waited to passed to hardware */ - - /* - * port based vlan configuration state. - * value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE - */ - uint16_t pvid_state; + uint16_t rx_rearm_start; /* index of BD that driver re-arming from */ + uint16_t rx_rearm_nb; /* number of remaining BDs to be re-armed */ /* 4 if DEV_RX_OFFLOAD_KEEP_CRC offload set, 0 otherwise */ uint8_t crc_len; bool rx_deferred_start; /* don't start this queue in dev start */ bool configured; /* indicate if rx queue has been configured */ + /* + * Indicate whether ignore the outer VLAN field in the Rx BD reported + * by the Hardware. Because the outer VLAN is the PVID if the PVID is + * set for some version of hardware network engine whose vlan mode is + * HNS3_SW_SHIFT_AND_DISCARD_MODE, such as kunpeng 920. And this VLAN + * should not be transitted to the upper-layer application. For hardware + * network engine whose vlan mode is HNS3_HW_SHIFT_AND_DISCARD_MODE, + * such as kunpeng 930, PVID will not be reported to the BDs. So, PMD + * driver does not need to perform PVID-related operation in Rx. At this + * point, the pvid_sw_discard_en will be false. + */ + bool pvid_sw_discard_en; + bool enabled; /* indicate if Rx queue has been enabled */ uint64_t l2_errors; uint64_t pkt_len_errors; - uint64_t l3_csum_erros; - uint64_t l4_csum_erros; - uint64_t ol3_csum_erros; - uint64_t ol4_csum_erros; + uint64_t l3_csum_errors; + uint64_t l4_csum_errors; + uint64_t ol3_csum_errors; + uint64_t ol4_csum_errors; + + struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM]; + uint16_t bulk_mbuf_num; + + /* offset_table: used for vector, to solve execute re-order problem */ + uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1]; + uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */ + struct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */ }; struct hns3_tx_queue { void *io_base; + volatile void *io_tail_reg; struct hns3_adapter *hns; struct hns3_desc *tx_ring; uint64_t tx_ring_phys_addr; /* TX ring DMA address */ @@ -285,24 +337,76 @@ struct hns3_tx_queue { uint16_t queue_id; uint16_t port_id; uint16_t nb_tx_desc; + /* + * index of next BD whose corresponding rte_mbuf can be released by + * driver. + */ uint16_t next_to_clean; + /* index of next BD to be filled by driver to send packet */ uint16_t next_to_use; + /* num of remaining BDs ready to be filled by driver to send packet */ uint16_t tx_bd_ready; + /* threshold for free tx buffer if available BDs less than this value */ + uint16_t tx_free_thresh; + /* - * port based vlan configuration state. - * value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE + * For better performance in tx datapath, releasing mbuf in batches is + * required. + * Only checking the VLD bit of the last descriptor in a batch of the + * thresh descriptors does not mean that these descriptors are all sent + * by hardware successfully. So we need to check that the VLD bits of + * all descriptors are cleared. and then free all mbufs in the batch. + * - tx_rs_thresh + * Number of mbufs released at a time. + * + * - free + * Tx mbuf free array used for preserving temporarily address of mbuf + * released back to mempool, when releasing mbuf in batches. */ - uint16_t pvid_state; + uint16_t tx_rs_thresh; + struct rte_mbuf **free; + /* + * tso mode. + * value range: + * HNS3_TSO_SW_CAL_PSEUDO_H_CSUM/HNS3_TSO_HW_CAL_PSEUDO_H_CSUM + * + * - HNS3_TSO_SW_CAL_PSEUDO_H_CSUM + * In this mode, because of the hardware constraint, network driver + * software need erase the L4 len value of the TCP pseudo header + * and recalculate the TCP pseudo header checksum of packets that + * need TSO. + * + * - HNS3_TSO_HW_CAL_PSEUDO_H_CSUM + * In this mode, hardware support recalculate the TCP pseudo header + * checksum of packets that need TSO, so network driver software + * not need to recalculate it. + */ + uint8_t tso_mode; /* * The minimum length of the packet supported by hardware in the Tx * direction. */ uint32_t min_tx_pkt_len; + uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */ bool tx_deferred_start; /* don't start this queue in dev start */ bool configured; /* indicate if tx queue has been configured */ + /* + * Indicate whether add the vlan_tci of the mbuf to the inner VLAN field + * of Tx BD. Because the outer VLAN will always be the PVID when the + * PVID is set and for some version of hardware network engine whose + * vlan mode is HNS3_SW_SHIFT_AND_DISCARD_MODE, such as kunpeng 920, the + * PVID will overwrite the outer VLAN field of Tx BD. For the hardware + * network engine whose vlan mode is HNS3_HW_SHIFT_AND_DISCARD_MODE, + * such as kunpeng 930, if the PVID is set, the hardware will shift the + * VLAN field automatically. So, PMD driver does not need to do + * PVID-related operations in Tx. And pvid_sw_shift_en will be false at + * this point. + */ + bool pvid_sw_shift_en; + bool enabled; /* indicate if Tx queue has been enabled */ /* * The following items are used for the abnormal errors statistics in @@ -354,6 +458,9 @@ struct hns3_tx_queue { uint64_t pkt_padding_fail_cnt; }; +#define HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) \ + ((txq)->nb_tx_desc - 1 - (txq)->tx_bd_ready) + struct hns3_queue_info { const char *type; /* point to queue memory name */ const char *ring_name; /* point to hardware ring name */ @@ -380,29 +487,170 @@ enum hns3_cksum_status { HNS3_OUTER_L4_CKSUM_ERR = 8 }; +static inline int +hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, + uint32_t bd_base_info, uint32_t l234_info, + uint32_t *cksum_err) +{ +#define L2E_TRUNC_ERR_FLAG (BIT(HNS3_RXD_L2E_B) | \ + BIT(HNS3_RXD_TRUNCATE_B)) +#define CHECKSUM_ERR_FLAG (BIT(HNS3_RXD_L3E_B) | \ + BIT(HNS3_RXD_L4E_B) | \ + BIT(HNS3_RXD_OL3E_B) | \ + BIT(HNS3_RXD_OL4E_B)) + + uint32_t tmp = 0; + + /* + * If packet len bigger than mtu when recv with no-scattered algorithm, + * the first n bd will without FE bit, we need process this sisution. + * Note: we don't need add statistic counter because latest BD which + * with FE bit will mark HNS3_RXD_L2E_B bit. + */ + if (unlikely((bd_base_info & BIT(HNS3_RXD_FE_B)) == 0)) + return -EINVAL; + + if (unlikely((l234_info & L2E_TRUNC_ERR_FLAG) || rxm->pkt_len == 0)) { + if (l234_info & BIT(HNS3_RXD_L2E_B)) + rxq->l2_errors++; + else + rxq->pkt_len_errors++; + return -EINVAL; + } + + if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) { + if (likely((l234_info & CHECKSUM_ERR_FLAG) == 0)) { + *cksum_err = 0; + return 0; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_L3E_B))) { + rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; + rxq->l3_csum_errors++; + tmp |= HNS3_L3_CKSUM_ERR; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_L4E_B))) { + rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; + rxq->l4_csum_errors++; + tmp |= HNS3_L4_CKSUM_ERR; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_OL3E_B))) { + rxq->ol3_csum_errors++; + tmp |= HNS3_OUTER_L3_CKSUM_ERR; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_OL4E_B))) { + rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD; + rxq->ol4_csum_errors++; + tmp |= HNS3_OUTER_L4_CKSUM_ERR; + } + } + *cksum_err = tmp; + + return 0; +} + +static inline void +hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, const uint64_t packet_type, + const uint32_t cksum_err) +{ + if (unlikely((packet_type & RTE_PTYPE_TUNNEL_MASK))) { + if (likely(packet_type & RTE_PTYPE_INNER_L3_MASK) && + (cksum_err & HNS3_L3_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + if (likely(packet_type & RTE_PTYPE_INNER_L4_MASK) && + (cksum_err & HNS3_L4_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + if (likely(packet_type & RTE_PTYPE_L4_MASK) && + (cksum_err & HNS3_OUTER_L4_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD; + } else { + if (likely(packet_type & RTE_PTYPE_L3_MASK) && + (cksum_err & HNS3_L3_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + if (likely(packet_type & RTE_PTYPE_L4_MASK) && + (cksum_err & HNS3_L4_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } +} + +static inline uint32_t +hns3_rx_calc_ptype(struct hns3_rx_queue *rxq, const uint32_t l234_info, + const uint32_t ol_info) +{ + const struct hns3_ptype_table *const ptype_tbl = rxq->ptype_tbl; + uint32_t l2id, l3id, l4id; + uint32_t ol3id, ol4id; + + ol4id = hns3_get_field(ol_info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S); + ol3id = hns3_get_field(ol_info, HNS3_RXD_OL3ID_M, HNS3_RXD_OL3ID_S); + l2id = hns3_get_field(l234_info, HNS3_RXD_STRP_TAGP_M, + HNS3_RXD_STRP_TAGP_S); + l3id = hns3_get_field(l234_info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S); + l4id = hns3_get_field(l234_info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S); + + if (unlikely(ptype_tbl->ol4table[ol4id])) + return ptype_tbl->inner_l2table[l2id] | + ptype_tbl->inner_l3table[l3id] | + ptype_tbl->inner_l4table[l4id] | + ptype_tbl->ol3table[ol3id] | ptype_tbl->ol4table[ol4id]; + else + return ptype_tbl->l2table[l2id] | ptype_tbl->l3table[l3id] | + ptype_tbl->l4table[l4id]; +} + void hns3_dev_rx_queue_release(void *queue); void hns3_dev_tx_queue_release(void *queue); void hns3_free_all_queues(struct rte_eth_dev *dev); -int hns3_reset_all_queues(struct hns3_adapter *hns); +int hns3_reset_all_tqps(struct hns3_adapter *hns); void hns3_dev_all_rx_queue_intr_enable(struct hns3_hw *hw, bool en); int hns3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id); int hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id); void hns3_enable_all_queues(struct hns3_hw *hw, bool en); -int hns3_start_queues(struct hns3_adapter *hns, bool reset_queue); -int hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue); +int hns3_init_queues(struct hns3_adapter *hns, bool reset_queue); +void hns3_start_tqps(struct hns3_hw *hw); +void hns3_stop_tqps(struct hns3_hw *hw); +int hns3_rxq_iterate(struct rte_eth_dev *dev, + int (*callback)(struct hns3_rx_queue *, void *), void *arg); void hns3_dev_release_mbufs(struct hns3_adapter *hns); int hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket, const struct rte_eth_rxconf *conf, struct rte_mempool *mp); int hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket, const struct rte_eth_txconf *conf); +int hns3_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id); +int hns3_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id); +int hns3_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id); +int hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id); uint16_t hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t hns3_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +uint16_t hns3_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +uint16_t hns3_recv_pkts_vec_sve(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +int hns3_rx_burst_mode_get(struct rte_eth_dev *dev, + __rte_unused uint16_t queue_id, + struct rte_eth_burst_mode *mode); +int hns3_rx_check_vec_support(struct rte_eth_dev *dev); uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t hns3_xmit_pkts_vec_sve(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, + __rte_unused uint16_t queue_id, + struct rte_eth_burst_mode *mode); const uint32_t *hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev); +void hns3_init_rx_ptype_tble(struct rte_eth_dev *dev); void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev); void hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, uint8_t gl_idx, uint16_t gl_value); @@ -414,9 +662,19 @@ int hns3_set_fake_rx_or_tx_queues(struct rte_eth_dev *dev, uint16_t nb_rx_q, uint16_t nb_tx_q); int hns3_config_gro(struct hns3_hw *hw, bool en); int hns3_restore_gro_conf(struct hns3_hw *hw); -void hns3_update_all_queues_pvid_state(struct hns3_hw *hw); +void hns3_update_all_queues_pvid_proc_en(struct hns3_hw *hw); +void hns3_rx_scattered_reset(struct rte_eth_dev *dev); +void hns3_rx_scattered_calc(struct rte_eth_dev *dev); +int hns3_rx_check_vec_support(struct rte_eth_dev *dev); +int hns3_tx_check_vec_support(struct rte_eth_dev *dev); +void hns3_rxq_vec_setup(struct hns3_rx_queue *rxq); void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_txq_info *qinfo); +uint32_t hns3_get_tqp_reg_offset(uint16_t idx); +int hns3_start_all_txqs(struct rte_eth_dev *dev); +int hns3_start_all_rxqs(struct rte_eth_dev *dev); +void hns3_stop_all_txqs(struct rte_eth_dev *dev); + #endif /* _HNS3_RXTX_H_ */