net/hns3: support Tx push quick doorbell for performance
[dpdk.git] / drivers / net / hns3 / hns3_ethdev_vf.c
index 1af947a..e582503 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2018-2019 Hisilicon Limited.
+ * Copyright(c) 2018-2021 HiSilicon Limited.
  */
 
 #include <linux/pci_regs.h>
@@ -44,6 +44,9 @@ static int hns3vf_add_mc_mac_addr(struct hns3_hw *hw,
                                  struct rte_ether_addr *mac_addr);
 static int hns3vf_remove_mc_mac_addr(struct hns3_hw *hw,
                                     struct rte_ether_addr *mac_addr);
+static int hns3vf_dev_link_update(struct rte_eth_dev *eth_dev,
+                                  __rte_unused int wait_to_complete);
+
 /* set PCI bus mastering */
 static int
 hns3vf_set_bus_master(const struct rte_pci_device *device, bool op)
@@ -153,9 +156,12 @@ hns3vf_enable_msix(const struct rte_pci_device *device, bool op)
                if (ret < 0) {
                        PMD_INIT_LOG(ERR, "failed to write PCI offset 0x%x",
                                    (pos + PCI_MSIX_FLAGS));
+                       return -ENXIO;
                }
+
                return 0;
        }
+
        return -ENXIO;
 }
 
@@ -778,6 +784,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
        uint16_t nb_rx_q = dev->data->nb_rx_queues;
        uint16_t nb_tx_q = dev->data->nb_tx_queues;
        struct rte_eth_rss_conf rss_conf;
+       uint32_t max_rx_pkt_len;
        uint16_t mtu;
        bool gro_en;
        int ret;
@@ -794,13 +801,11 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
         * work as usual. But these fake queues are imperceptible, and can not
         * be used by upper applications.
         */
-       if (!hns3_dev_indep_txrx_supported(hw)) {
-               ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q);
-               if (ret) {
-                       hns3_err(hw, "fail to set Rx/Tx fake queues, ret = %d.",
-                                ret);
-                       return ret;
-               }
+       ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q);
+       if (ret) {
+               hns3_err(hw, "fail to set Rx/Tx fake queues, ret = %d.", ret);
+               hw->cfg_max_queues = 0;
+               return ret;
        }
 
        hw->adapter_state = HNS3_NIC_CONFIGURING;
@@ -825,12 +830,18 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
         * according to the maximum RX packet length.
         */
        if (conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
-               /*
-                * Security of max_rx_pkt_len is guaranteed in dpdk frame.
-                * Maximum value of max_rx_pkt_len is HNS3_MAX_FRAME_LEN, so it
-                * can safely assign to "uint16_t" type variable.
-                */
-               mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(conf->rxmode.max_rx_pkt_len);
+               max_rx_pkt_len = conf->rxmode.max_rx_pkt_len;
+               if (max_rx_pkt_len > HNS3_MAX_FRAME_LEN ||
+                   max_rx_pkt_len <= HNS3_DEFAULT_FRAME_LEN) {
+                       hns3_err(hw, "maximum Rx packet length must be greater "
+                                "than %u and less than %u when jumbo frame enabled.",
+                                (uint16_t)HNS3_DEFAULT_FRAME_LEN,
+                                (uint16_t)HNS3_MAX_FRAME_LEN);
+                       ret = -EINVAL;
+                       goto cfg_err;
+               }
+
+               mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(max_rx_pkt_len);
                ret = hns3vf_dev_mtu_set(dev, mtu);
                if (ret)
                        goto cfg_err;
@@ -847,17 +858,13 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
        if (ret)
                goto cfg_err;
 
-       hns->rx_simple_allowed = true;
-       hns->rx_vec_allowed = true;
-       hns->tx_simple_allowed = true;
-       hns->tx_vec_allowed = true;
-
        hns3_init_rx_ptype_tble(dev);
 
        hw->adapter_state = HNS3_NIC_CONFIGURED;
        return 0;
 
 cfg_err:
+       hw->cfg_max_queues = 0;
        (void)hns3_set_fake_rx_or_tx_queues(dev, 0, 0);
        hw->adapter_state = HNS3_NIC_INITIALIZED;
 
@@ -981,6 +988,9 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info)
                                 DEV_TX_OFFLOAD_MBUF_FAST_FREE |
                                 hns3_txvlan_cap_get(hw));
 
+       if (hns3_dev_outer_udp_cksum_supported(hw))
+               info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_UDP_CKSUM;
+
        if (hns3_dev_indep_txrx_supported(hw))
                info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
                                 RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
@@ -1014,11 +1024,14 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info)
                .offloads = 0,
        };
 
-       info->vmdq_queue_num = 0;
-
-       info->reta_size = HNS3_RSS_IND_TBL_SIZE;
+       info->reta_size = hw->rss_ind_tbl_size;
        info->hash_key_size = HNS3_RSS_KEY_SIZE;
        info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT;
+
+       info->default_rxportconf.burst_size = HNS3_DEFAULT_PORT_CONF_BURST_SIZE;
+       info->default_txportconf.burst_size = HNS3_DEFAULT_PORT_CONF_BURST_SIZE;
+       info->default_rxportconf.nb_queues = HNS3_DEFAULT_PORT_CONF_QUEUES_NUM;
+       info->default_txportconf.nb_queues = HNS3_DEFAULT_PORT_CONF_QUEUES_NUM;
        info->default_rxportconf.ring_size = HNS3_DEFAULT_RING_DESC;
        info->default_txportconf.ring_size = HNS3_DEFAULT_RING_DESC;
 
@@ -1054,12 +1067,11 @@ hns3vf_check_event_cause(struct hns3_adapter *hns, uint32_t *clearval)
 
        /* Fetch the events from their corresponding regs */
        cmdq_stat_reg = hns3_read_dev(hw, HNS3_VECTOR0_CMDQ_STAT_REG);
-
        if (BIT(HNS3_VECTOR0_RST_INT_B) & cmdq_stat_reg) {
                rst_ing_reg = hns3_read_dev(hw, HNS3_FUN_RST_ING);
                hns3_warn(hw, "resetting reg: 0x%x", rst_ing_reg);
                hns3_atomic_set_bit(HNS3_VF_RESET, &hw->reset.pending);
-               rte_atomic16_set(&hw->reset.disable_cmd, 1);
+               __atomic_store_n(&hw->reset.disable_cmd, 1, __ATOMIC_RELAXED);
                val = hns3_read_dev(hw, HNS3_VF_RST_ING);
                hns3_write_dev(hw, HNS3_VF_RST_ING, val | HNS3_VF_RST_ING_BIT);
                val = cmdq_stat_reg & ~BIT(HNS3_VECTOR0_RST_INT_B);
@@ -1099,9 +1111,6 @@ hns3vf_interrupt_handler(void *param)
        enum hns3vf_evt_cause event_cause;
        uint32_t clearval;
 
-       if (hw->irq_thread_id == 0)
-               hw->irq_thread_id = pthread_self();
-
        /* Disable interrupt */
        hns3vf_disable_irq0(hw);
 
@@ -1148,6 +1157,20 @@ hns3vf_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc)
        hw->intr.int_ql_max = rte_le_to_cpu_16(req0->intr_ql_max);
 }
 
+static int
+hns3vf_check_dev_specifications(struct hns3_hw *hw)
+{
+       if (hw->rss_ind_tbl_size == 0 ||
+           hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) {
+               hns3_warn(hw, "the size of hash lookup table configured (%u)"
+                             " exceeds the maximum(%u)", hw->rss_ind_tbl_size,
+                             HNS3_RSS_IND_TBL_SIZE_MAX);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int
 hns3vf_query_dev_specifications(struct hns3_hw *hw)
 {
@@ -1168,7 +1191,66 @@ hns3vf_query_dev_specifications(struct hns3_hw *hw)
 
        hns3vf_parse_dev_specifications(hw, desc);
 
-       return 0;
+       return hns3vf_check_dev_specifications(hw);
+}
+
+void
+hns3vf_update_push_lsc_cap(struct hns3_hw *hw, bool supported)
+{
+       uint16_t val = supported ? HNS3_PF_PUSH_LSC_CAP_SUPPORTED :
+                                  HNS3_PF_PUSH_LSC_CAP_NOT_SUPPORTED;
+       uint16_t exp = HNS3_PF_PUSH_LSC_CAP_UNKNOWN;
+       struct hns3_vf *vf = HNS3_DEV_HW_TO_VF(hw);
+
+       if (vf->pf_push_lsc_cap == HNS3_PF_PUSH_LSC_CAP_UNKNOWN)
+               __atomic_compare_exchange(&vf->pf_push_lsc_cap, &exp, &val, 0,
+                                         __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
+}
+
+static void
+hns3vf_get_push_lsc_cap(struct hns3_hw *hw)
+{
+#define HNS3_CHECK_PUSH_LSC_CAP_TIMEOUT_MS     500
+
+       struct rte_eth_dev *dev = &rte_eth_devices[hw->data->port_id];
+       int32_t remain_ms = HNS3_CHECK_PUSH_LSC_CAP_TIMEOUT_MS;
+       uint16_t val = HNS3_PF_PUSH_LSC_CAP_NOT_SUPPORTED;
+       uint16_t exp = HNS3_PF_PUSH_LSC_CAP_UNKNOWN;
+       struct hns3_vf *vf = HNS3_DEV_HW_TO_VF(hw);
+
+       __atomic_store_n(&vf->pf_push_lsc_cap, HNS3_PF_PUSH_LSC_CAP_UNKNOWN,
+                        __ATOMIC_RELEASE);
+
+       (void)hns3_send_mbx_msg(hw, HNS3_MBX_GET_LINK_STATUS, 0, NULL, 0, false,
+                               NULL, 0);
+
+       while (remain_ms > 0) {
+               rte_delay_ms(HNS3_POLL_RESPONE_MS);
+               if (__atomic_load_n(&vf->pf_push_lsc_cap, __ATOMIC_ACQUIRE) !=
+                       HNS3_PF_PUSH_LSC_CAP_UNKNOWN)
+                       break;
+               remain_ms--;
+       }
+
+       /*
+        * When exit above loop, the pf_push_lsc_cap could be one of the three
+        * state: unknown (means pf not ack), not_supported, supported.
+        * Here config it as 'not_supported' when it's 'unknown' state.
+        */
+       __atomic_compare_exchange(&vf->pf_push_lsc_cap, &exp, &val, 0,
+                                 __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
+
+       if (__atomic_load_n(&vf->pf_push_lsc_cap, __ATOMIC_ACQUIRE) ==
+               HNS3_PF_PUSH_LSC_CAP_SUPPORTED) {
+               hns3_info(hw, "detect PF support push link status change!");
+       } else {
+               /*
+                * Framework already set RTE_ETH_DEV_INTR_LSC bit because driver
+                * declared RTE_PCI_DRV_INTR_LSC in drv_flags. So here cleared
+                * the RTE_ETH_DEV_INTR_LSC capability.
+                */
+               dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
+       }
 }
 
 static int
@@ -1197,6 +1279,7 @@ hns3vf_get_capability(struct hns3_hw *hw)
                hw->intr.mapping_mode = HNS3_INTR_MAPPING_VEC_RSV_ONE;
                hw->intr.gl_unit = HNS3_INTR_COALESCE_GL_UINT_2US;
                hw->tso_mode = HNS3_TSO_SW_CAL_PSEUDO_H_CSUM;
+               hw->drop_stats_mode = HNS3_PKTS_DROP_STATS_MODE1;
                hw->min_tx_pkt_len = HNS3_HIP08_MIN_TX_PKT_LEN;
                hw->rss_info.ipv6_sctp_offload_supported = false;
                hw->promisc_mode = HNS3_UNLIMIT_PROMISC_MODE;
@@ -1214,6 +1297,7 @@ hns3vf_get_capability(struct hns3_hw *hw)
        hw->intr.mapping_mode = HNS3_INTR_MAPPING_VEC_ALL;
        hw->intr.gl_unit = HNS3_INTR_COALESCE_GL_UINT_1US;
        hw->tso_mode = HNS3_TSO_HW_CAL_PSEUDO_H_CSUM;
+       hw->drop_stats_mode = HNS3_PKTS_DROP_STATS_MODE2;
        hw->min_tx_pkt_len = HNS3_HIP09_MIN_TX_PKT_LEN;
        hw->rss_info.ipv6_sctp_offload_supported = true;
        hw->promisc_mode = HNS3_LIMIT_PROMISC_MODE;
@@ -1382,6 +1466,8 @@ hns3vf_get_configuration(struct hns3_hw *hw)
                return ret;
        }
 
+       hns3vf_get_push_lsc_cap(hw);
+
        /* Get queue configuration from PF */
        ret = hns3vf_get_queue_info(hw);
        if (ret)
@@ -1411,33 +1497,64 @@ hns3vf_set_tc_queue_mapping(struct hns3_adapter *hns, uint16_t nb_rx_q,
 {
        struct hns3_hw *hw = &hns->hw;
 
-       if (nb_rx_q < hw->num_tc) {
-               hns3_err(hw, "number of Rx queues(%u) is less than tcs(%u).",
-                        nb_rx_q, hw->num_tc);
-               return -EINVAL;
-       }
-
-       if (nb_tx_q < hw->num_tc) {
-               hns3_err(hw, "number of Tx queues(%u) is less than tcs(%u).",
-                        nb_tx_q, hw->num_tc);
-               return -EINVAL;
-       }
-
        return hns3_queue_to_tc_mapping(hw, nb_rx_q, nb_tx_q);
 }
 
 static void
 hns3vf_request_link_info(struct hns3_hw *hw)
 {
-       uint8_t resp_msg;
+       struct hns3_vf *vf = HNS3_DEV_HW_TO_VF(hw);
+       bool send_req;
        int ret;
 
        if (__atomic_load_n(&hw->reset.resetting, __ATOMIC_RELAXED))
                return;
+
+       send_req = vf->pf_push_lsc_cap == HNS3_PF_PUSH_LSC_CAP_NOT_SUPPORTED ||
+                  vf->req_link_info_cnt > 0;
+       if (!send_req)
+               return;
+
        ret = hns3_send_mbx_msg(hw, HNS3_MBX_GET_LINK_STATUS, 0, NULL, 0, false,
-                               &resp_msg, sizeof(resp_msg));
-       if (ret)
-               hns3_err(hw, "Failed to fetch link status from PF: %d", ret);
+                               NULL, 0);
+       if (ret) {
+               hns3_err(hw, "failed to fetch link status, ret = %d", ret);
+               return;
+       }
+
+       if (vf->req_link_info_cnt > 0)
+               vf->req_link_info_cnt--;
+}
+
+void
+hns3vf_update_link_status(struct hns3_hw *hw, uint8_t link_status,
+                         uint32_t link_speed, uint8_t link_duplex)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[hw->data->port_id];
+       struct hns3_vf *vf = HNS3_DEV_HW_TO_VF(hw);
+       struct hns3_mac *mac = &hw->mac;
+       int ret;
+
+       /*
+        * PF kernel driver may push link status when VF driver is in resetting,
+        * driver will stop polling job in this case, after resetting done
+        * driver will start polling job again.
+        * When polling job started, driver will get initial link status by
+        * sending request to PF kernel driver, then could update link status by
+        * process PF kernel driver's link status mailbox message.
+        */
+       if (!__atomic_load_n(&vf->poll_job_started, __ATOMIC_RELAXED))
+               return;
+
+       if (hw->adapter_state != HNS3_NIC_STARTED)
+               return;
+
+       mac->link_status = link_status;
+       mac->link_speed = link_speed;
+       mac->link_duplex = link_duplex;
+       ret = hns3vf_dev_link_update(dev, 0);
+       if (ret == 0 && dev->data->dev_conf.intr_conf.lsc != 0)
+               hns3_start_report_lse(dev);
 }
 
 static int
@@ -1491,7 +1608,8 @@ hns3vf_en_hw_strip_rxvtag(struct hns3_hw *hw, bool enable)
        ret = hns3_send_mbx_msg(hw, HNS3_MBX_SET_VLAN, HNS3_MBX_VLAN_RX_OFF_CFG,
                                &msg_data, sizeof(msg_data), false, NULL, 0);
        if (ret)
-               hns3_err(hw, "vf enable strip failed, ret =%d", ret);
+               hns3_err(hw, "vf %s strip failed, ret = %d.",
+                               enable ? "enable" : "disable", ret);
 
        return ret;
 }
@@ -1631,11 +1749,10 @@ hns3vf_keep_alive_handler(void *param)
        struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)param;
        struct hns3_adapter *hns = eth_dev->data->dev_private;
        struct hns3_hw *hw = &hns->hw;
-       uint8_t respmsg;
        int ret;
 
        ret = hns3_send_mbx_msg(hw, HNS3_MBX_KEEP_ALIVE, 0, NULL, 0,
-                               false, &respmsg, sizeof(uint8_t));
+                               false, NULL, 0);
        if (ret)
                hns3_err(hw, "VF sends keeping alive cmd failed(=%d)",
                         ret);
@@ -1653,8 +1770,8 @@ hns3vf_service_handler(void *param)
 
        /*
         * The query link status and reset processing are executed in the
-        * interrupt thread.When the IMP reset occurs, IMP will not respond,
-        * and the query operation will time out after 30ms. In the case of
+        * interrupt thread. When the IMP reset occurs, IMP will not respond,
+        * and the query operation will timeout after 30ms. In the case of
         * multiple PF/VFs, each query failure timeout causes the IMP reset
         * interrupt to fail to respond within 100ms.
         * Before querying the link status, check whether there is a reset
@@ -1669,6 +1786,31 @@ hns3vf_service_handler(void *param)
                          eth_dev);
 }
 
+static void
+hns3vf_start_poll_job(struct rte_eth_dev *dev)
+{
+#define HNS3_REQUEST_LINK_INFO_REMAINS_CNT     3
+
+       struct hns3_vf *vf = HNS3_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+       if (vf->pf_push_lsc_cap == HNS3_PF_PUSH_LSC_CAP_SUPPORTED)
+               vf->req_link_info_cnt = HNS3_REQUEST_LINK_INFO_REMAINS_CNT;
+
+       __atomic_store_n(&vf->poll_job_started, 1, __ATOMIC_RELAXED);
+
+       hns3vf_service_handler(dev);
+}
+
+static void
+hns3vf_stop_poll_job(struct rte_eth_dev *dev)
+{
+       struct hns3_vf *vf = HNS3_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+       rte_eal_alarm_cancel(hns3vf_service_handler, dev);
+
+       __atomic_store_n(&vf->poll_job_started, 0, __ATOMIC_RELAXED);
+}
+
 static int
 hns3_query_vf_resource(struct hns3_hw *hw)
 {
@@ -1737,12 +1879,6 @@ hns3vf_init_hardware(struct hns3_adapter *hns)
                goto err_init_hardware;
        }
 
-       ret = hns3vf_set_alive(hw, true);
-       if (ret) {
-               PMD_INIT_LOG(ERR, "Failed to VF send alive to PF: %d", ret);
-               goto err_init_hardware;
-       }
-
        return 0;
 
 err_init_hardware:
@@ -1785,6 +1921,8 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev)
                goto err_cmd_init;
        }
 
+       hns3_tx_push_init(eth_dev);
+
        /* Get VF resource */
        ret = hns3_query_vf_resource(hw);
        if (ret)
@@ -1816,6 +1954,13 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev)
        if (ret)
                goto err_get_config;
 
+       /* Hardware statistics of imissed registers cleared. */
+       ret = hns3_update_imissed_stats(hw, true);
+       if (ret) {
+               hns3_err(hw, "clear imissed stats failed, ret = %d", ret);
+               goto err_set_tc_queue;
+       }
+
        ret = hns3vf_set_tc_queue_mapping(hns, hw->tqps_num, hw->tqps_num);
        if (ret) {
                PMD_INIT_LOG(ERR, "failed to set tc info, ret = %d.", ret);
@@ -1832,7 +1977,13 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev)
        if (ret)
                goto err_set_tc_queue;
 
-       hns3_set_default_rss_args(hw);
+       hns3_rss_set_default_args(hw);
+
+       ret = hns3vf_set_alive(hw, true);
+       if (ret) {
+               PMD_INIT_LOG(ERR, "Failed to VF send alive to PF: %d", ret);
+               goto err_set_tc_queue;
+       }
 
        return 0;
 
@@ -1885,7 +2036,18 @@ hns3vf_do_stop(struct hns3_adapter *hns)
 
        hw->mac.link_status = ETH_LINK_DOWN;
 
-       if (rte_atomic16_read(&hw->reset.disable_cmd) == 0) {
+       /*
+        * The "hns3vf_do_stop" function will also be called by .stop_service to
+        * prepare reset. At the time of global or IMP reset, the command cannot
+        * be sent to stop the tx/rx queues. The mbuf in Tx/Rx queues may be
+        * accessed during the reset process. So the mbuf can not be released
+        * during reset and is required to be released after the reset is
+        * completed.
+        */
+       if (__atomic_load_n(&hw->reset.resetting,  __ATOMIC_RELAXED) == 0)
+               hns3_dev_release_mbufs(hns);
+
+       if (__atomic_load_n(&hw->reset.disable_cmd, __ATOMIC_RELAXED) == 0) {
                hns3vf_configure_mac_addr(hns, true);
                ret = hns3_reset_all_tqps(hns);
                if (ret) {
@@ -1947,18 +2109,18 @@ hns3vf_dev_stop(struct rte_eth_dev *dev)
        /* Disable datapath on secondary process. */
        hns3_mp_req_stop_rxtx(dev);
        /* Prevent crashes when queues are still in use. */
-       rte_delay_ms(hw->tqps_num);
+       rte_delay_ms(hw->cfg_max_queues);
 
        rte_spinlock_lock(&hw->lock);
        if (__atomic_load_n(&hw->reset.resetting, __ATOMIC_RELAXED) == 0) {
                hns3_stop_tqps(hw);
                hns3vf_do_stop(hns);
                hns3vf_unmap_rx_interrupt(dev);
-               hns3_dev_release_mbufs(hns);
                hw->adapter_state = HNS3_NIC_CONFIGURED;
        }
        hns3_rx_scattered_reset(dev);
-       rte_eal_alarm_cancel(hns3vf_service_handler, dev);
+       hns3vf_stop_poll_job(dev);
+       hns3_stop_report_lse(dev);
        rte_spinlock_unlock(&hw->lock);
 
        return 0;
@@ -1971,8 +2133,11 @@ hns3vf_dev_close(struct rte_eth_dev *eth_dev)
        struct hns3_hw *hw = &hns->hw;
        int ret = 0;
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               rte_free(eth_dev->process_private);
+               eth_dev->process_private = NULL;
                return 0;
+       }
 
        if (hw->adapter_state == HNS3_NIC_STARTED)
                ret = hns3vf_dev_stop(eth_dev);
@@ -2012,8 +2177,11 @@ hns3vf_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version,
                                      HNS3_FW_VERSION_BYTE1_S),
                       hns3_get_field(version, HNS3_FW_VERSION_BYTE0_M,
                                      HNS3_FW_VERSION_BYTE0_S));
+       if (ret < 0)
+               return -EINVAL;
+
        ret += 1; /* add the size of '\0' */
-       if (fw_size < (uint32_t)ret)
+       if (fw_size < (size_t)ret)
                return ret;
        else
                return 0;
@@ -2039,13 +2207,18 @@ hns3vf_dev_link_update(struct rte_eth_dev *eth_dev,
        case ETH_SPEED_NUM_50G:
        case ETH_SPEED_NUM_100G:
        case ETH_SPEED_NUM_200G:
-               new_link.link_speed = mac->link_speed;
+               if (mac->link_status)
+                       new_link.link_speed = mac->link_speed;
                break;
        default:
-               new_link.link_speed = ETH_SPEED_NUM_100M;
+               if (mac->link_status)
+                       new_link.link_speed = ETH_SPEED_NUM_UNKNOWN;
                break;
        }
 
+       if (!mac->link_status)
+               new_link.link_speed = ETH_SPEED_NUM_NONE;
+
        new_link.link_duplex = mac->link_duplex;
        new_link.link_status = mac->link_status ? ETH_LINK_UP : ETH_LINK_DOWN;
        new_link.link_autoneg =
@@ -2066,6 +2239,8 @@ hns3vf_do_start(struct hns3_adapter *hns, bool reset_queue)
        if (ret)
                return ret;
 
+       hns3_enable_rxd_adv_layout(hw);
+
        ret = hns3_init_queues(hns, reset_queue);
        if (ret)
                hns3_err(hw, "failed to init queues, ret = %d.", ret);
@@ -2085,21 +2260,22 @@ hns3vf_map_rx_interrupt(struct rte_eth_dev *dev)
        uint16_t q_id;
        int ret;
 
-       if (dev->data->dev_conf.intr_conf.rxq == 0)
+       /*
+        * hns3 needs a separate interrupt to be used as event interrupt which
+        * could not be shared with task queue pair, so KERNEL drivers need
+        * support multiple interrupt vectors.
+        */
+       if (dev->data->dev_conf.intr_conf.rxq == 0 ||
+           !rte_intr_cap_multiple(intr_handle))
                return 0;
 
-       /* disable uio/vfio intr/eventfd mapping */
        rte_intr_disable(intr_handle);
+       intr_vector = hw->used_rx_queues;
+       /* It creates event fd for each intr vector when MSIX is used */
+       if (rte_intr_efd_enable(intr_handle, intr_vector))
+               return -EINVAL;
 
-       /* check and configure queue intr-vector mapping */
-       if (rte_intr_cap_multiple(intr_handle) ||
-           !RTE_ETH_DEV_SRIOV(dev).active) {
-               intr_vector = hw->used_rx_queues;
-               /* It creates event fd for each intr vector when MSIX is used */
-               if (rte_intr_efd_enable(intr_handle, intr_vector))
-                       return -EINVAL;
-       }
-       if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
+       if (intr_handle->intr_vec == NULL) {
                intr_handle->intr_vec =
                        rte_zmalloc("intr_vec",
                                    hw->used_rx_queues * sizeof(int), 0);
@@ -2115,28 +2291,26 @@ hns3vf_map_rx_interrupt(struct rte_eth_dev *dev)
                vec = RTE_INTR_VEC_RXTX_OFFSET;
                base = RTE_INTR_VEC_RXTX_OFFSET;
        }
-       if (rte_intr_dp_is_en(intr_handle)) {
-               for (q_id = 0; q_id < hw->used_rx_queues; q_id++) {
-                       ret = hns3vf_bind_ring_with_vector(hw, vec, true,
-                                                          HNS3_RING_TYPE_RX,
-                                                          q_id);
-                       if (ret)
-                               goto vf_bind_vector_error;
-                       intr_handle->intr_vec[q_id] = vec;
-                       if (vec < base + intr_handle->nb_efd - 1)
-                               vec++;
-               }
+
+       for (q_id = 0; q_id < hw->used_rx_queues; q_id++) {
+               ret = hns3vf_bind_ring_with_vector(hw, vec, true,
+                                                  HNS3_RING_TYPE_RX, q_id);
+               if (ret)
+                       goto vf_bind_vector_error;
+               intr_handle->intr_vec[q_id] = vec;
+               /*
+                * If there are not enough efds (e.g. not enough interrupt),
+                * remaining queues will be bond to the last interrupt.
+                */
+               if (vec < base + intr_handle->nb_efd - 1)
+                       vec++;
        }
        rte_intr_enable(intr_handle);
        return 0;
 
 vf_bind_vector_error:
-       rte_intr_efd_disable(intr_handle);
-       if (intr_handle->intr_vec) {
-               free(intr_handle->intr_vec);
-               intr_handle->intr_vec = NULL;
-       }
-       return ret;
+       free(intr_handle->intr_vec);
+       intr_handle->intr_vec = NULL;
 vf_alloc_intr_vec_error:
        rte_intr_efd_disable(intr_handle);
        return ret;
@@ -2193,11 +2367,8 @@ hns3vf_dev_start(struct rte_eth_dev *dev)
                return ret;
        }
        ret = hns3vf_map_rx_interrupt(dev);
-       if (ret) {
-               hw->adapter_state = HNS3_NIC_CONFIGURED;
-               rte_spinlock_unlock(&hw->lock);
-               return ret;
-       }
+       if (ret)
+               goto map_rx_inter_err;
 
        /*
         * There are three register used to control the status of a TQP
@@ -2211,19 +2382,12 @@ hns3vf_dev_start(struct rte_eth_dev *dev)
         * status of queue in the dpdk framework.
         */
        ret = hns3_start_all_txqs(dev);
-       if (ret) {
-               hw->adapter_state = HNS3_NIC_CONFIGURED;
-               rte_spinlock_unlock(&hw->lock);
-               return ret;
-       }
+       if (ret)
+               goto map_rx_inter_err;
 
        ret = hns3_start_all_rxqs(dev);
-       if (ret) {
-               hns3_stop_all_txqs(dev);
-               hw->adapter_state = HNS3_NIC_CONFIGURED;
-               rte_spinlock_unlock(&hw->lock);
-               return ret;
-       }
+       if (ret)
+               goto start_all_rxqs_fail;
 
        hw->adapter_state = HNS3_NIC_STARTED;
        rte_spinlock_unlock(&hw->lock);
@@ -2231,19 +2395,26 @@ hns3vf_dev_start(struct rte_eth_dev *dev)
        hns3_rx_scattered_calc(dev);
        hns3_set_rxtx_function(dev);
        hns3_mp_req_start_rxtx(dev);
-       hns3vf_service_handler(dev);
 
        hns3vf_restore_filter(dev);
 
        /* Enable interrupt of all rx queues before enabling queues */
        hns3_dev_all_rx_queue_intr_enable(hw, true);
-
-       /*
-        * After finished the initialization, start all tqps to receive/transmit
-        * packets and refresh all queue status.
-        */
        hns3_start_tqps(hw);
 
+       if (dev->data->dev_conf.intr_conf.lsc != 0)
+               hns3vf_dev_link_update(dev, 0);
+       hns3vf_start_poll_job(dev);
+
+       return ret;
+
+start_all_rxqs_fail:
+       hns3_stop_all_txqs(dev);
+map_rx_inter_err:
+       (void)hns3vf_do_stop(hns);
+       hw->adapter_state = HNS3_NIC_CONFIGURED;
+       rte_spinlock_unlock(&hw->lock);
+
        return ret;
 }
 
@@ -2293,7 +2464,8 @@ hns3vf_is_reset_pending(struct hns3_adapter *hns)
        /* Check the registers to confirm whether there is reset pending */
        hns3vf_check_event_cause(hns, NULL);
        reset = hns3vf_get_reset_level(hw, &hw->reset.pending);
-       if (hw->reset.level != HNS3_NONE_RESET && hw->reset.level < reset) {
+       if (hw->reset.level != HNS3_NONE_RESET && reset != HNS3_NONE_RESET &&
+           hw->reset.level < reset) {
                hns3_warn(hw, "High level reset %d is pending", reset);
                return true;
        }
@@ -2331,7 +2503,7 @@ hns3vf_wait_hardware_ready(struct hns3_adapter *hns)
                hns3_warn(hw, "hardware is ready, delay 1 sec for PF reset complete");
                return -EAGAIN;
        } else if (wait_data->result == HNS3_WAIT_TIMEOUT) {
-               gettimeofday(&tv, NULL);
+               hns3_clock_gettime(&tv);
                hns3_warn(hw, "Reset step4 hardware not ready after reset time=%ld.%.6ld",
                          tv.tv_sec, tv.tv_usec);
                return -ETIME;
@@ -2341,7 +2513,7 @@ hns3vf_wait_hardware_ready(struct hns3_adapter *hns)
        wait_data->hns = hns;
        wait_data->check_completion = is_vf_reset_done;
        wait_data->end_ms = (uint64_t)HNS3VF_RESET_WAIT_CNT *
-                                     HNS3VF_RESET_WAIT_MS + get_timeofday_ms();
+                               HNS3VF_RESET_WAIT_MS + hns3_clock_gettime_ms();
        wait_data->interval = HNS3VF_RESET_WAIT_MS * USEC_PER_MSEC;
        wait_data->count = HNS3VF_RESET_WAIT_CNT;
        wait_data->result = HNS3_WAIT_REQUEST;
@@ -2353,15 +2525,17 @@ static int
 hns3vf_prepare_reset(struct hns3_adapter *hns)
 {
        struct hns3_hw *hw = &hns->hw;
-       int ret = 0;
+       int ret;
 
        if (hw->reset.level == HNS3_VF_FUNC_RESET) {
                ret = hns3_send_mbx_msg(hw, HNS3_MBX_RESET, 0, NULL,
                                        0, true, NULL, 0);
+               if (ret)
+                       return ret;
        }
-       rte_atomic16_set(&hw->reset.disable_cmd, 1);
+       __atomic_store_n(&hw->reset.disable_cmd, 1, __ATOMIC_RELAXED);
 
-       return ret;
+       return 0;
 }
 
 static int
@@ -2371,15 +2545,22 @@ hns3vf_stop_service(struct hns3_adapter *hns)
        struct rte_eth_dev *eth_dev;
 
        eth_dev = &rte_eth_devices[hw->data->port_id];
-       if (hw->adapter_state == HNS3_NIC_STARTED)
-               rte_eal_alarm_cancel(hns3vf_service_handler, eth_dev);
+       if (hw->adapter_state == HNS3_NIC_STARTED) {
+               /*
+                * Make sure call update link status before hns3vf_stop_poll_job
+                * because update link status depend on polling job exist.
+                */
+               hns3vf_update_link_status(hw, ETH_LINK_DOWN, hw->mac.link_speed,
+                                         hw->mac.link_duplex);
+               hns3vf_stop_poll_job(eth_dev);
+       }
        hw->mac.link_status = ETH_LINK_DOWN;
 
        hns3_set_rxtx_function(eth_dev);
        rte_wmb();
        /* Disable datapath on secondary process. */
        hns3_mp_req_stop_rxtx(eth_dev);
-       rte_delay_ms(hw->tqps_num);
+       rte_delay_ms(hw->cfg_max_queues);
 
        rte_spinlock_lock(&hw->lock);
        if (hw->adapter_state == HNS3_NIC_STARTED ||
@@ -2395,7 +2576,7 @@ hns3vf_stop_service(struct hns3_adapter *hns)
         * from table space. Hence, for function reset software intervention is
         * required to delete the entries.
         */
-       if (rte_atomic16_read(&hw->reset.disable_cmd) == 0)
+       if (__atomic_load_n(&hw->reset.disable_cmd, __ATOMIC_RELAXED) == 0)
                hns3vf_configure_all_mc_mac_addr(hns, true);
        rte_spinlock_unlock(&hw->lock);
 
@@ -2412,7 +2593,7 @@ hns3vf_start_service(struct hns3_adapter *hns)
        hns3_set_rxtx_function(eth_dev);
        hns3_mp_req_start_rxtx(eth_dev);
        if (hw->adapter_state == HNS3_NIC_STARTED) {
-               hns3vf_service_handler(eth_dev);
+               hns3vf_start_poll_job(eth_dev);
 
                /* Enable interrupt of all rx queues before enabling queues */
                hns3_dev_all_rx_queue_intr_enable(hw, true);
@@ -2517,6 +2698,13 @@ hns3vf_restore_conf(struct hns3_adapter *hns)
                hns3_info(hw, "hns3vf dev restart successful!");
        } else if (hw->adapter_state == HNS3_NIC_STOPPING)
                hw->adapter_state = HNS3_NIC_CONFIGURED;
+
+       ret = hns3vf_set_alive(hw, true);
+       if (ret) {
+               hns3_err(hw, "failed to VF send alive to PF: %d", ret);
+               goto err_vlan_table;
+       }
+
        return 0;
 
 err_vlan_table:
@@ -2567,8 +2755,10 @@ hns3vf_reset_service(void *param)
         * The interrupt may have been lost. It is necessary to handle
         * the interrupt to recover from the error.
         */
-       if (rte_atomic16_read(&hns->hw.reset.schedule) == SCHEDULE_DEFERRED) {
-               rte_atomic16_set(&hns->hw.reset.schedule, SCHEDULE_REQUESTED);
+       if (__atomic_load_n(&hw->reset.schedule, __ATOMIC_RELAXED) ==
+                           SCHEDULE_DEFERRED) {
+               __atomic_store_n(&hw->reset.schedule, SCHEDULE_REQUESTED,
+                                __ATOMIC_RELAXED);
                hns3_err(hw, "Handling interrupts in delayed tasks");
                hns3vf_interrupt_handler(&rte_eth_devices[hw->data->port_id]);
                reset_level = hns3vf_get_reset_level(hw, &hw->reset.pending);
@@ -2577,7 +2767,7 @@ hns3vf_reset_service(void *param)
                        hns3_atomic_set_bit(HNS3_VF_RESET, &hw->reset.pending);
                }
        }
-       rte_atomic16_set(&hns->hw.reset.schedule, SCHEDULE_NONE);
+       __atomic_store_n(&hw->reset.schedule, SCHEDULE_NONE, __ATOMIC_RELAXED);
 
        /*
         * Hardware reset has been notified, we now have to poll & check if
@@ -2585,14 +2775,13 @@ hns3vf_reset_service(void *param)
         */
        reset_level = hns3vf_get_reset_level(hw, &hw->reset.pending);
        if (reset_level != HNS3_NONE_RESET) {
-               gettimeofday(&tv_start, NULL);
+               hns3_clock_gettime(&tv_start);
                hns3_reset_process(hns, reset_level);
-               gettimeofday(&tv, NULL);
+               hns3_clock_gettime(&tv);
                timersub(&tv, &tv_start, &tv_delta);
-               msec = tv_delta.tv_sec * MSEC_PER_SEC +
-                      tv_delta.tv_usec / USEC_PER_MSEC;
+               msec = hns3_clock_calctime_ms(&tv_delta);
                if (msec > HNS3_RESET_PROCESS_MS)
-                       hns3_err(hw, "%d handle long time delta %" PRIx64
+                       hns3_err(hw, "%d handle long time delta %" PRIu64
                                 " ms time=%ld.%.6ld",
                                 hw->reset.level, msec, tv.tv_sec, tv.tv_usec);
        }
@@ -2693,11 +2882,12 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = {
        .rss_hash_conf_get  = hns3_dev_rss_hash_conf_get,
        .reta_update        = hns3_dev_rss_reta_update,
        .reta_query         = hns3_dev_rss_reta_query,
-       .filter_ctrl        = hns3_dev_filter_ctrl,
+       .flow_ops_get       = hns3_dev_flow_ops_get,
        .vlan_filter_set    = hns3vf_vlan_filter_set,
        .vlan_offload_set   = hns3vf_vlan_offload_set,
        .get_reg            = hns3_get_regs,
        .dev_supported_ptypes_get = hns3_dev_supported_ptypes_get,
+       .tx_done_cleanup    = hns3_tx_done_cleanup,
 };
 
 static const struct hns3_reset_ops hns3vf_reset_ops = {
@@ -2728,8 +2918,7 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
                return -ENOMEM;
        }
 
-       /* initialize flow filter lists */
-       hns3_filterlist_init(eth_dev);
+       hns3_flow_init(eth_dev);
 
        hns3_set_rxtx_function(eth_dev);
        eth_dev->dev_ops = &hns3vf_eth_dev_ops;
@@ -2741,8 +2930,8 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
                                          "process, ret = %d", ret);
                        goto err_mp_init_secondary;
                }
-
                hw->secondary_cnt++;
+               hns3_tx_push_init(eth_dev);
                return 0;
        }
 
@@ -2757,6 +2946,7 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
        hw->adapter_state = HNS3_NIC_UNINITIALIZED;
        hns->is_vf = true;
        hw->data = eth_dev->data;
+       hns3_parse_devargs(eth_dev);
 
        ret = hns3_reset_init(hw);
        if (ret)
@@ -2800,7 +2990,8 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
 
        hw->adapter_state = HNS3_NIC_INITIALIZED;
 
-       if (rte_atomic16_read(&hns->hw.reset.schedule) == SCHEDULE_PENDING) {
+       if (__atomic_load_n(&hw->reset.schedule, __ATOMIC_RELAXED) ==
+                           SCHEDULE_PENDING) {
                hns3_err(hw, "Reschedule reset service after dev_init");
                hns3_schedule_reset(hns);
        } else {
@@ -2824,8 +3015,10 @@ err_mp_init_primary:
 err_mp_init_secondary:
        eth_dev->dev_ops = NULL;
        eth_dev->rx_pkt_burst = NULL;
+       eth_dev->rx_descriptor_status = NULL;
        eth_dev->tx_pkt_burst = NULL;
        eth_dev->tx_pkt_prepare = NULL;
+       eth_dev->tx_descriptor_status = NULL;
        rte_free(eth_dev->process_private);
        eth_dev->process_private = NULL;
 
@@ -2840,8 +3033,11 @@ hns3vf_dev_uninit(struct rte_eth_dev *eth_dev)
 
        PMD_INIT_FUNC_TRACE();
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return -EPERM;
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               rte_free(eth_dev->process_private);
+               eth_dev->process_private = NULL;
+               return 0;
+       }
 
        if (hw->adapter_state < HNS3_NIC_CLOSING)
                hns3vf_dev_close(eth_dev);
@@ -2873,7 +3069,7 @@ static const struct rte_pci_id pci_id_hns3vf_map[] = {
 
 static struct rte_pci_driver rte_hns3vf_pmd = {
        .id_table = pci_id_hns3vf_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
        .probe = eth_hns3vf_pci_probe,
        .remove = eth_hns3vf_pci_remove,
 };
@@ -2881,3 +3077,7 @@ static struct rte_pci_driver rte_hns3vf_pmd = {
 RTE_PMD_REGISTER_PCI(net_hns3_vf, rte_hns3vf_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_hns3_vf, pci_id_hns3vf_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_hns3_vf, "* igb_uio | vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(net_hns3_vf,
+               HNS3_DEVARG_RX_FUNC_HINT "=vec|sve|simple|common "
+               HNS3_DEVARG_TX_FUNC_HINT "=vec|sve|simple|common "
+               HNS3_DEVARG_DEV_CAPS_MASK "=<1-65535> ");