net/hns3: support Rx interrupt
[dpdk.git] / drivers / net / hns3 / hns3_ethdev_vf.c
index 0f0fd8c..bd52e19 100644 (file)
@@ -9,6 +9,8 @@
 #include <inttypes.h>
 #include <unistd.h>
 #include <arpa/inet.h>
+#include <linux/pci_regs.h>
+
 #include <rte_alarm.h>
 #include <rte_atomic.h>
 #include <rte_bus_pci.h>
@@ -24,6 +26,7 @@
 #include <rte_io.h>
 #include <rte_log.h>
 #include <rte_pci.h>
+#include <rte_vfio.h>
 
 #include "hns3_ethdev.h"
 #include "hns3_logs.h"
@@ -31,6 +34,7 @@
 #include "hns3_regs.h"
 #include "hns3_intr.h"
 #include "hns3_dcb.h"
+#include "hns3_mp.h"
 
 #define HNS3VF_KEEP_ALIVE_INTERVAL     2000000 /* us */
 #define HNS3VF_SERVICE_INTERVAL                1000000 /* us */
 #define HNS3VF_RESET_WAIT_MS   20
 #define HNS3VF_RESET_WAIT_CNT  2000
 
+/* Reset related Registers */
+#define HNS3_GLOBAL_RESET_BIT          0
+#define HNS3_CORE_RESET_BIT            1
+#define HNS3_IMP_RESET_BIT             2
+#define HNS3_FUN_RST_ING_B             0
+
 enum hns3vf_evt_cause {
        HNS3VF_VECTOR0_EVENT_RST,
        HNS3VF_VECTOR0_EVENT_MBX,
        HNS3VF_VECTOR0_EVENT_OTHER,
 };
 
+static enum hns3_reset_level hns3vf_get_reset_level(struct hns3_hw *hw,
+                                                   uint64_t *levels);
 static int hns3vf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 static int hns3vf_dev_configure_vlan(struct rte_eth_dev *dev);
 
+/* set PCI bus mastering */
+static void
+hns3vf_set_bus_master(const struct rte_pci_device *device, bool op)
+{
+       uint16_t reg;
+
+       rte_pci_read_config(device, &reg, sizeof(reg), PCI_COMMAND);
+
+       if (op)
+               /* set the master bit */
+               reg |= PCI_COMMAND_MASTER;
+       else
+               reg &= ~(PCI_COMMAND_MASTER);
+
+       rte_pci_write_config(device, &reg, sizeof(reg), PCI_COMMAND);
+}
+
+/**
+ * hns3vf_find_pci_capability - lookup a capability in the PCI capability list
+ * @cap: the capability
+ *
+ * Return the address of the given capability within the PCI capability list.
+ */
+static int
+hns3vf_find_pci_capability(const struct rte_pci_device *device, int cap)
+{
+#define MAX_PCIE_CAPABILITY 48
+       uint16_t status;
+       uint8_t pos;
+       uint8_t id;
+       int ttl;
+
+       rte_pci_read_config(device, &status, sizeof(status), PCI_STATUS);
+       if (!(status & PCI_STATUS_CAP_LIST))
+               return 0;
+
+       ttl = MAX_PCIE_CAPABILITY;
+       rte_pci_read_config(device, &pos, sizeof(pos), PCI_CAPABILITY_LIST);
+       while (ttl-- && pos >= PCI_STD_HEADER_SIZEOF) {
+               rte_pci_read_config(device, &id, sizeof(id),
+                                   (pos + PCI_CAP_LIST_ID));
+
+               if (id == 0xFF)
+                       break;
+
+               if (id == cap)
+                       return (int)pos;
+
+               rte_pci_read_config(device, &pos, sizeof(pos),
+                                   (pos + PCI_CAP_LIST_NEXT));
+       }
+       return 0;
+}
+
+static int
+hns3vf_enable_msix(const struct rte_pci_device *device, bool op)
+{
+       uint16_t control;
+       int pos;
+
+       pos = hns3vf_find_pci_capability(device, PCI_CAP_ID_MSIX);
+       if (pos) {
+               rte_pci_read_config(device, &control, sizeof(control),
+                                   (pos + PCI_MSIX_FLAGS));
+               if (op)
+                       control |= PCI_MSIX_FLAGS_ENABLE;
+               else
+                       control &= ~PCI_MSIX_FLAGS_ENABLE;
+               rte_pci_write_config(device, &control, sizeof(control),
+                                    (pos + PCI_MSIX_FLAGS));
+               return 0;
+       }
+       return -1;
+}
+
 static int
 hns3vf_add_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
                    __attribute__ ((unused)) uint32_t idx,
@@ -427,6 +514,11 @@ hns3vf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
                return -EBUSY;
        }
 
+       if (rte_atomic16_read(&hw->reset.resetting)) {
+               hns3_err(hw, "Failed to set mtu during resetting");
+               return -EIO;
+       }
+
        rte_spinlock_lock(&hw->lock);
        ret = hns3vf_config_mtu(hw, mtu);
        if (ret) {
@@ -528,11 +620,32 @@ hns3vf_check_event_cause(struct hns3_adapter *hns, uint32_t *clearval)
        struct hns3_hw *hw = &hns->hw;
        enum hns3vf_evt_cause ret;
        uint32_t cmdq_stat_reg;
+       uint32_t rst_ing_reg;
        uint32_t val;
 
        /* Fetch the events from their corresponding regs */
        cmdq_stat_reg = hns3_read_dev(hw, HNS3_VECTOR0_CMDQ_STAT_REG);
 
+       if (BIT(HNS3_VECTOR0_RST_INT_B) & cmdq_stat_reg) {
+               rst_ing_reg = hns3_read_dev(hw, HNS3_FUN_RST_ING);
+               hns3_warn(hw, "resetting reg: 0x%x", rst_ing_reg);
+               hns3_atomic_set_bit(HNS3_VF_RESET, &hw->reset.pending);
+               rte_atomic16_set(&hw->reset.disable_cmd, 1);
+               val = hns3_read_dev(hw, HNS3_VF_RST_ING);
+               hns3_write_dev(hw, HNS3_VF_RST_ING, val | HNS3_VF_RST_ING_BIT);
+               val = cmdq_stat_reg & ~BIT(HNS3_VECTOR0_RST_INT_B);
+               if (clearval) {
+                       hw->reset.stats.global_cnt++;
+                       hns3_warn(hw, "Global reset detected, clear reset status");
+               } else {
+                       hns3_schedule_delayed_reset(hns);
+                       hns3_warn(hw, "Global reset detected, don't clear reset status");
+               }
+
+               ret = HNS3VF_VECTOR0_EVENT_RST;
+               goto out;
+       }
+
        /* Check for vector0 mailbox(=CMDQ RX) event source */
        if (BIT(HNS3_VECTOR0_RX_CMDQ_INT_B) & cmdq_stat_reg) {
                val = cmdq_stat_reg & ~BIT(HNS3_VECTOR0_RX_CMDQ_INT_B);
@@ -567,6 +680,9 @@ hns3vf_interrupt_handler(void *param)
        event_cause = hns3vf_check_event_cause(hns, &clearval);
 
        switch (event_cause) {
+       case HNS3VF_VECTOR0_EVENT_RST:
+               hns3_schedule_reset(hns);
+               break;
        case HNS3VF_VECTOR0_EVENT_MBX:
                hns3_dev_handle_mbx_msg(hw);
                break;
@@ -742,6 +858,12 @@ hns3vf_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        struct hns3_hw *hw = &hns->hw;
        int ret;
 
+       if (rte_atomic16_read(&hw->reset.resetting)) {
+               hns3_err(hw,
+                        "vf set vlan id failed during resetting, vlan_id =%u",
+                        vlan_id);
+               return -EIO;
+       }
        rte_spinlock_lock(&hw->lock);
        ret = hns3vf_vlan_filter_configure(hns, vlan_id, on);
        rte_spinlock_unlock(&hw->lock);
@@ -789,6 +911,73 @@ hns3vf_vlan_offload_set(struct rte_eth_dev *dev, int mask)
        return 0;
 }
 
+static int
+hns3vf_handle_all_vlan_table(struct hns3_adapter *hns, int on)
+{
+       struct rte_vlan_filter_conf *vfc;
+       struct hns3_hw *hw = &hns->hw;
+       uint16_t vlan_id;
+       uint64_t vbit;
+       uint64_t ids;
+       int ret = 0;
+       uint32_t i;
+
+       vfc = &hw->data->vlan_filter_conf;
+       for (i = 0; i < RTE_DIM(vfc->ids); i++) {
+               if (vfc->ids[i] == 0)
+                       continue;
+               ids = vfc->ids[i];
+               while (ids) {
+                       /*
+                        * 64 means the num bits of ids, one bit corresponds to
+                        * one vlan id
+                        */
+                       vlan_id = 64 * i;
+                       /* count trailing zeroes */
+                       vbit = ~ids & (ids - 1);
+                       /* clear least significant bit set */
+                       ids ^= (ids ^ (ids - 1)) ^ vbit;
+                       for (; vbit;) {
+                               vbit >>= 1;
+                               vlan_id++;
+                       }
+                       ret = hns3vf_vlan_filter_configure(hns, vlan_id, on);
+                       if (ret) {
+                               hns3_err(hw,
+                                        "VF handle vlan table failed, ret =%d, on = %d",
+                                        ret, on);
+                               return ret;
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static int
+hns3vf_remove_all_vlan_table(struct hns3_adapter *hns)
+{
+       return hns3vf_handle_all_vlan_table(hns, 0);
+}
+
+static int
+hns3vf_restore_vlan_conf(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       struct rte_eth_conf *dev_conf;
+       bool en;
+       int ret;
+
+       dev_conf = &hw->data->dev_conf;
+       en = dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_VLAN_STRIP ? true
+                                                                  : false;
+       ret = hns3vf_en_hw_strip_rxvtag(hw, en);
+       if (ret)
+               hns3_err(hw, "VF restore vlan conf fail, en =%d, ret =%d", en,
+                        ret);
+       return ret;
+}
+
 static int
 hns3vf_dev_configure_vlan(struct rte_eth_dev *dev)
 {
@@ -848,7 +1037,19 @@ hns3vf_service_handler(void *param)
        struct hns3_adapter *hns = eth_dev->data->dev_private;
        struct hns3_hw *hw = &hns->hw;
 
-       hns3vf_request_link_info(hw);
+       /*
+        * The query link status and reset processing are executed in the
+        * interrupt thread.When the IMP reset occurs, IMP will not respond,
+        * and the query operation will time out after 30ms. In the case of
+        * multiple PF/VFs, each query failure timeout causes the IMP reset
+        * interrupt to fail to respond within 100ms.
+        * Before querying the link status, check whether there is a reset
+        * pending, and if so, abandon the query.
+        */
+       if (!hns3vf_is_reset_pending(hns))
+               hns3vf_request_link_info(hw);
+       else
+               hns3_warn(hw, "Cancel the query when reset is pending");
 
        rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler,
                          eth_dev);
@@ -895,6 +1096,14 @@ err_init_hardware:
        return ret;
 }
 
+static int
+hns3vf_clear_vport_list(struct hns3_hw *hw)
+{
+       return hns3_send_mbx_msg(hw, HNS3_MBX_HANDLE_VF_TBL,
+                                HNS3_MBX_VPORT_LIST_CLEAR, NULL, 0, false,
+                                NULL, 0);
+}
+
 static int
 hns3vf_init_vf(struct rte_eth_dev *eth_dev)
 {
@@ -946,6 +1155,12 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev)
 
        rte_eth_random_addr(hw->mac.mac_addr); /* Generate a random mac addr */
 
+       ret = hns3vf_clear_vport_list(hw);
+       if (ret) {
+               PMD_INIT_LOG(ERR, "Failed to clear tbl list: %d", ret);
+               goto err_get_config;
+       }
+
        ret = hns3vf_init_hardware(hns);
        if (ret)
                goto err_get_config;
@@ -993,34 +1208,111 @@ hns3vf_uninit_vf(struct rte_eth_dev *eth_dev)
        hw->io_base = NULL;
 }
 
+static int
+hns3vf_bind_ring_with_vector(struct rte_eth_dev *dev, uint8_t vector_id,
+                            bool mmap, uint16_t queue_id)
+
+{
+       struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct hns3_vf_bind_vector_msg bind_msg;
+       uint16_t code;
+       int ret;
+
+       memset(&bind_msg, 0, sizeof(bind_msg));
+       code = mmap ? HNS3_MBX_MAP_RING_TO_VECTOR :
+               HNS3_MBX_UNMAP_RING_TO_VECTOR;
+       bind_msg.vector_id = vector_id;
+       bind_msg.ring_num = 1;
+       bind_msg.param[0].ring_type = HNS3_RING_TYPE_RX;
+       bind_msg.param[0].tqp_index = queue_id;
+       bind_msg.param[0].int_gl_index = HNS3_RING_GL_RX;
+
+       ret = hns3_send_mbx_msg(hw, code, 0, (uint8_t *)&bind_msg,
+                               sizeof(bind_msg), false, NULL, 0);
+       if (ret) {
+               hns3_err(hw, "Map TQP %d fail, vector_id is %d, ret is %d.",
+                        queue_id, vector_id, ret);
+               return ret;
+       }
+
+       return 0;
+}
+
 static int
 hns3vf_do_stop(struct hns3_adapter *hns)
 {
        struct hns3_hw *hw = &hns->hw;
+       bool reset_queue;
 
        hw->mac.link_status = ETH_LINK_DOWN;
 
-       hns3vf_configure_mac_addr(hns, true);
+       if (rte_atomic16_read(&hw->reset.disable_cmd) == 0) {
+               hns3vf_configure_mac_addr(hns, true);
+               reset_queue = true;
+       } else
+               reset_queue = false;
+       return hns3_stop_queues(hns, reset_queue);
+}
 
-       return 0;
+static void
+hns3vf_unmap_rx_interrupt(struct rte_eth_dev *dev)
+{
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       uint8_t base = 0;
+       uint8_t vec = 0;
+       uint16_t q_id;
+
+       if (dev->data->dev_conf.intr_conf.rxq == 0)
+               return;
+
+       /* unmap the ring with vector */
+       if (rte_intr_allow_others(intr_handle)) {
+               vec = RTE_INTR_VEC_RXTX_OFFSET;
+               base = RTE_INTR_VEC_RXTX_OFFSET;
+       }
+       if (rte_intr_dp_is_en(intr_handle)) {
+               for (q_id = 0; q_id < dev->data->nb_rx_queues; q_id++) {
+                       (void)hns3vf_bind_ring_with_vector(dev, vec, false,
+                                                          q_id);
+                       if (vec < base + intr_handle->nb_efd - 1)
+                               vec++;
+               }
+       }
+       /* Clean datapath event and queue/vec mapping */
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+       }
 }
 
 static void
-hns3vf_dev_stop(struct rte_eth_dev *eth_dev)
+hns3vf_dev_stop(struct rte_eth_dev *dev)
 {
-       struct hns3_adapter *hns = eth_dev->data->dev_private;
+       struct hns3_adapter *hns = dev->data->dev_private;
        struct hns3_hw *hw = &hns->hw;
 
        PMD_INIT_FUNC_TRACE();
 
        hw->adapter_state = HNS3_NIC_STOPPING;
-       hns3_set_rxtx_function(eth_dev);
+       hns3_set_rxtx_function(dev);
+       rte_wmb();
+       /* Disable datapath on secondary process. */
+       hns3_mp_req_stop_rxtx(dev);
+       /* Prevent crashes when queues are still in use. */
+       rte_delay_ms(hw->tqps_num);
 
        rte_spinlock_lock(&hw->lock);
-       hns3vf_do_stop(hns);
-       hns3_dev_release_mbufs(hns);
-       hw->adapter_state = HNS3_NIC_CONFIGURED;
+       if (rte_atomic16_read(&hw->reset.resetting) == 0) {
+               hns3vf_do_stop(hns);
+               hns3_dev_release_mbufs(hns);
+               hw->adapter_state = HNS3_NIC_CONFIGURED;
+       }
+       rte_eal_alarm_cancel(hns3vf_service_handler, dev);
        rte_spinlock_unlock(&hw->lock);
+
+       hns3vf_unmap_rx_interrupt(dev);
 }
 
 static void
@@ -1029,18 +1321,24 @@ hns3vf_dev_close(struct rte_eth_dev *eth_dev)
        struct hns3_adapter *hns = eth_dev->data->dev_private;
        struct hns3_hw *hw = &hns->hw;
 
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return;
+
        if (hw->adapter_state == HNS3_NIC_STARTED)
                hns3vf_dev_stop(eth_dev);
 
        hw->adapter_state = HNS3_NIC_CLOSING;
+       hns3_reset_abort(hns);
+       hw->adapter_state = HNS3_NIC_CLOSED;
        rte_eal_alarm_cancel(hns3vf_keep_alive_handler, eth_dev);
-       rte_eal_alarm_cancel(hns3vf_service_handler, eth_dev);
        hns3vf_configure_all_mc_mac_addr(hns, true);
+       hns3vf_remove_all_vlan_table(hns);
        hns3vf_uninit_vf(eth_dev);
        hns3_free_all_queues(eth_dev);
+       rte_free(hw->reset.wait_data);
        rte_free(eth_dev->process_private);
        eth_dev->process_private = NULL;
-       hw->adapter_state = HNS3_NIC_CLOSED;
+       hns3_mp_uninit_primary();
        hns3_warn(hw, "Close port %d finished", hw->data->port_id);
 }
 
@@ -1053,8 +1351,6 @@ hns3vf_dev_link_update(struct rte_eth_dev *eth_dev,
        struct hns3_mac *mac = &hw->mac;
        struct rte_eth_link new_link;
 
-       hns3vf_request_link_info(hw);
-
        memset(&new_link, 0, sizeof(new_link));
        switch (mac->link_speed) {
        case ETH_SPEED_NUM_10M:
@@ -1098,13 +1394,84 @@ hns3vf_do_start(struct hns3_adapter *hns, bool reset_queue)
 }
 
 static int
-hns3vf_dev_start(struct rte_eth_dev *eth_dev)
+hns3vf_map_rx_interrupt(struct rte_eth_dev *dev)
 {
-       struct hns3_adapter *hns = eth_dev->data->dev_private;
-       struct hns3_hw *hw = &hns->hw;
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t intr_vector;
+       uint8_t base = 0;
+       uint8_t vec = 0;
+       uint16_t q_id;
        int ret;
 
+       if (dev->data->dev_conf.intr_conf.rxq == 0)
+               return 0;
+
+       /* disable uio/vfio intr/eventfd mapping */
+       rte_intr_disable(intr_handle);
+
+       /* check and configure queue intr-vector mapping */
+       if (rte_intr_cap_multiple(intr_handle) ||
+               !RTE_ETH_DEV_SRIOV(dev).active) {
+               intr_vector = dev->data->nb_rx_queues;
+               /* It creates event fd for each intr vector when MSIX is used */
+               if (rte_intr_efd_enable(intr_handle, intr_vector))
+                       return -EINVAL;
+       }
+       if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
+               intr_handle->intr_vec =
+                       rte_zmalloc("intr_vec",
+                                   dev->data->nb_rx_queues * sizeof(int), 0);
+               if (intr_handle->intr_vec == NULL) {
+                       hns3_err(hw, "Failed to allocate %d rx_queues"
+                                    " intr_vec", dev->data->nb_rx_queues);
+                       ret = -ENOMEM;
+                       goto vf_alloc_intr_vec_error;
+               }
+       }
+
+       if (rte_intr_allow_others(intr_handle)) {
+               vec = RTE_INTR_VEC_RXTX_OFFSET;
+               base = RTE_INTR_VEC_RXTX_OFFSET;
+       }
+       if (rte_intr_dp_is_en(intr_handle)) {
+               for (q_id = 0; q_id < dev->data->nb_rx_queues; q_id++) {
+                       ret = hns3vf_bind_ring_with_vector(dev, vec, true,
+                                                          q_id);
+                       if (ret)
+                               goto vf_bind_vector_error;
+                       intr_handle->intr_vec[q_id] = vec;
+                       if (vec < base + intr_handle->nb_efd - 1)
+                               vec++;
+               }
+       }
+       rte_intr_enable(intr_handle);
+       return 0;
+
+vf_bind_vector_error:
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec) {
+               free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+       }
+       return ret;
+vf_alloc_intr_vec_error:
+       rte_intr_efd_disable(intr_handle);
+       return ret;
+}
+
+static int
+hns3vf_dev_start(struct rte_eth_dev *dev)
+{
+       struct hns3_adapter *hns = dev->data->dev_private;
+       struct hns3_hw *hw = &hns->hw;
+       int ret = 0;
+
        PMD_INIT_FUNC_TRACE();
+       if (rte_atomic16_read(&hw->reset.resetting))
+               return -EBUSY;
+
        rte_spinlock_lock(&hw->lock);
        hw->adapter_state = HNS3_NIC_STARTING;
        ret = hns3vf_do_start(hns, true);
@@ -1115,10 +1482,329 @@ hns3vf_dev_start(struct rte_eth_dev *eth_dev)
        }
        hw->adapter_state = HNS3_NIC_STARTED;
        rte_spinlock_unlock(&hw->lock);
+
+       ret = hns3vf_map_rx_interrupt(dev);
+       if (ret)
+               return ret;
+       hns3_set_rxtx_function(dev);
+       hns3_mp_req_start_rxtx(dev);
+       rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler, dev);
+       return ret;
+}
+
+static bool
+is_vf_reset_done(struct hns3_hw *hw)
+{
+#define HNS3_FUN_RST_ING_BITS \
+       (BIT(HNS3_VECTOR0_GLOBALRESET_INT_B) | \
+        BIT(HNS3_VECTOR0_CORERESET_INT_B) | \
+        BIT(HNS3_VECTOR0_IMPRESET_INT_B) | \
+        BIT(HNS3_VECTOR0_FUNCRESET_INT_B))
+
+       uint32_t val;
+
+       if (hw->reset.level == HNS3_VF_RESET) {
+               val = hns3_read_dev(hw, HNS3_VF_RST_ING);
+               if (val & HNS3_VF_RST_ING_BIT)
+                       return false;
+       } else {
+               val = hns3_read_dev(hw, HNS3_FUN_RST_ING);
+               if (val & HNS3_FUN_RST_ING_BITS)
+                       return false;
+       }
+       return true;
+}
+
+bool
+hns3vf_is_reset_pending(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       enum hns3_reset_level reset;
+
+       hns3vf_check_event_cause(hns, NULL);
+       reset = hns3vf_get_reset_level(hw, &hw->reset.pending);
+       if (hw->reset.level != HNS3_NONE_RESET && hw->reset.level < reset) {
+               hns3_warn(hw, "High level reset %d is pending", reset);
+               return true;
+       }
+       return false;
+}
+
+static int
+hns3vf_wait_hardware_ready(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       struct hns3_wait_data *wait_data = hw->reset.wait_data;
+       struct timeval tv;
+
+       if (wait_data->result == HNS3_WAIT_SUCCESS) {
+               /*
+                * After vf reset is ready, the PF may not have completed
+                * the reset processing. The vf sending mbox to PF may fail
+                * during the pf reset, so it is better to add extra delay.
+                */
+               if (hw->reset.level == HNS3_VF_FUNC_RESET ||
+                   hw->reset.level == HNS3_FLR_RESET)
+                       return 0;
+               /* Reset retry process, no need to add extra delay. */
+               if (hw->reset.attempts)
+                       return 0;
+               if (wait_data->check_completion == NULL)
+                       return 0;
+
+               wait_data->check_completion = NULL;
+               wait_data->interval = 1 * MSEC_PER_SEC * USEC_PER_MSEC;
+               wait_data->count = 1;
+               wait_data->result = HNS3_WAIT_REQUEST;
+               rte_eal_alarm_set(wait_data->interval, hns3_wait_callback,
+                                 wait_data);
+               hns3_warn(hw, "hardware is ready, delay 1 sec for PF reset complete");
+               return -EAGAIN;
+       } else if (wait_data->result == HNS3_WAIT_TIMEOUT) {
+               gettimeofday(&tv, NULL);
+               hns3_warn(hw, "Reset step4 hardware not ready after reset time=%ld.%.6ld",
+                         tv.tv_sec, tv.tv_usec);
+               return -ETIME;
+       } else if (wait_data->result == HNS3_WAIT_REQUEST)
+               return -EAGAIN;
+
+       wait_data->hns = hns;
+       wait_data->check_completion = is_vf_reset_done;
+       wait_data->end_ms = (uint64_t)HNS3VF_RESET_WAIT_CNT *
+                                     HNS3VF_RESET_WAIT_MS + get_timeofday_ms();
+       wait_data->interval = HNS3VF_RESET_WAIT_MS * USEC_PER_MSEC;
+       wait_data->count = HNS3VF_RESET_WAIT_CNT;
+       wait_data->result = HNS3_WAIT_REQUEST;
+       rte_eal_alarm_set(wait_data->interval, hns3_wait_callback, wait_data);
+       return -EAGAIN;
+}
+
+static int
+hns3vf_prepare_reset(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       int ret = 0;
+
+       if (hw->reset.level == HNS3_VF_FUNC_RESET) {
+               ret = hns3_send_mbx_msg(hw, HNS3_MBX_RESET, 0, NULL,
+                                       0, true, NULL, 0);
+       }
+       rte_atomic16_set(&hw->reset.disable_cmd, 1);
+
+       return ret;
+}
+
+static int
+hns3vf_stop_service(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       struct rte_eth_dev *eth_dev;
+
+       eth_dev = &rte_eth_devices[hw->data->port_id];
+       rte_eal_alarm_cancel(hns3vf_service_handler, eth_dev);
+       hw->mac.link_status = ETH_LINK_DOWN;
+
+       hns3_set_rxtx_function(eth_dev);
+       rte_wmb();
+       /* Disable datapath on secondary process. */
+       hns3_mp_req_stop_rxtx(eth_dev);
+       rte_delay_ms(hw->tqps_num);
+
+       rte_spinlock_lock(&hw->lock);
+       if (hw->adapter_state == HNS3_NIC_STARTED ||
+           hw->adapter_state == HNS3_NIC_STOPPING) {
+               hns3vf_do_stop(hns);
+               hw->reset.mbuf_deferred_free = true;
+       } else
+               hw->reset.mbuf_deferred_free = false;
+
+       /*
+        * It is cumbersome for hardware to pick-and-choose entries for deletion
+        * from table space. Hence, for function reset software intervention is
+        * required to delete the entries.
+        */
+       if (rte_atomic16_read(&hw->reset.disable_cmd) == 0)
+               hns3vf_configure_all_mc_mac_addr(hns, true);
+       rte_spinlock_unlock(&hw->lock);
+
+       return 0;
+}
+
+static int
+hns3vf_start_service(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       struct rte_eth_dev *eth_dev;
+
+       eth_dev = &rte_eth_devices[hw->data->port_id];
        hns3_set_rxtx_function(eth_dev);
+       hns3_mp_req_start_rxtx(eth_dev);
+
+       hns3vf_service_handler(eth_dev);
        return 0;
 }
 
+static int
+hns3vf_restore_conf(struct hns3_adapter *hns)
+{
+       struct hns3_hw *hw = &hns->hw;
+       int ret;
+
+       ret = hns3vf_configure_mac_addr(hns, false);
+       if (ret)
+               return ret;
+
+       ret = hns3vf_configure_all_mc_mac_addr(hns, false);
+       if (ret)
+               goto err_mc_mac;
+
+       ret = hns3vf_restore_vlan_conf(hns);
+       if (ret)
+               goto err_vlan_table;
+
+       if (hw->adapter_state == HNS3_NIC_STARTED) {
+               ret = hns3vf_do_start(hns, false);
+               if (ret)
+                       goto err_vlan_table;
+               hns3_info(hw, "hns3vf dev restart successful!");
+       } else if (hw->adapter_state == HNS3_NIC_STOPPING)
+               hw->adapter_state = HNS3_NIC_CONFIGURED;
+       return 0;
+
+err_vlan_table:
+       hns3vf_configure_all_mc_mac_addr(hns, true);
+err_mc_mac:
+       hns3vf_configure_mac_addr(hns, true);
+       return ret;
+}
+
+static enum hns3_reset_level
+hns3vf_get_reset_level(struct hns3_hw *hw, uint64_t *levels)
+{
+       enum hns3_reset_level reset_level;
+
+       /* return the highest priority reset level amongst all */
+       if (hns3_atomic_test_bit(HNS3_VF_RESET, levels))
+               reset_level = HNS3_VF_RESET;
+       else if (hns3_atomic_test_bit(HNS3_VF_FULL_RESET, levels))
+               reset_level = HNS3_VF_FULL_RESET;
+       else if (hns3_atomic_test_bit(HNS3_VF_PF_FUNC_RESET, levels))
+               reset_level = HNS3_VF_PF_FUNC_RESET;
+       else if (hns3_atomic_test_bit(HNS3_VF_FUNC_RESET, levels))
+               reset_level = HNS3_VF_FUNC_RESET;
+       else if (hns3_atomic_test_bit(HNS3_FLR_RESET, levels))
+               reset_level = HNS3_FLR_RESET;
+       else
+               reset_level = HNS3_NONE_RESET;
+
+       if (hw->reset.level != HNS3_NONE_RESET && reset_level < hw->reset.level)
+               return HNS3_NONE_RESET;
+
+       return reset_level;
+}
+
+static void
+hns3vf_reset_service(void *param)
+{
+       struct hns3_adapter *hns = (struct hns3_adapter *)param;
+       struct hns3_hw *hw = &hns->hw;
+       enum hns3_reset_level reset_level;
+       struct timeval tv_delta;
+       struct timeval tv_start;
+       struct timeval tv;
+       uint64_t msec;
+
+       /*
+        * The interrupt is not triggered within the delay time.
+        * The interrupt may have been lost. It is necessary to handle
+        * the interrupt to recover from the error.
+        */
+       if (rte_atomic16_read(&hns->hw.reset.schedule) == SCHEDULE_DEFERRED) {
+               rte_atomic16_set(&hns->hw.reset.schedule, SCHEDULE_REQUESTED);
+               hns3_err(hw, "Handling interrupts in delayed tasks");
+               hns3vf_interrupt_handler(&rte_eth_devices[hw->data->port_id]);
+               reset_level = hns3vf_get_reset_level(hw, &hw->reset.pending);
+               if (reset_level == HNS3_NONE_RESET) {
+                       hns3_err(hw, "No reset level is set, try global reset");
+                       hns3_atomic_set_bit(HNS3_VF_RESET, &hw->reset.pending);
+               }
+       }
+       rte_atomic16_set(&hns->hw.reset.schedule, SCHEDULE_NONE);
+
+       /*
+        * Hardware reset has been notified, we now have to poll & check if
+        * hardware has actually completed the reset sequence.
+        */
+       reset_level = hns3vf_get_reset_level(hw, &hw->reset.pending);
+       if (reset_level != HNS3_NONE_RESET) {
+               gettimeofday(&tv_start, NULL);
+               hns3_reset_process(hns, reset_level);
+               gettimeofday(&tv, NULL);
+               timersub(&tv, &tv_start, &tv_delta);
+               msec = tv_delta.tv_sec * MSEC_PER_SEC +
+                      tv_delta.tv_usec / USEC_PER_MSEC;
+               if (msec > HNS3_RESET_PROCESS_MS)
+                       hns3_err(hw, "%d handle long time delta %" PRIx64
+                                " ms time=%ld.%.6ld",
+                                hw->reset.level, msec, tv.tv_sec, tv.tv_usec);
+       }
+}
+
+static int
+hns3vf_reinit_dev(struct hns3_adapter *hns)
+{
+       struct rte_eth_dev *eth_dev = &rte_eth_devices[hns->hw.data->port_id];
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       struct hns3_hw *hw = &hns->hw;
+       int ret;
+
+       if (hw->reset.level == HNS3_VF_FULL_RESET) {
+               rte_intr_disable(&pci_dev->intr_handle);
+               hns3vf_set_bus_master(pci_dev, true);
+       }
+
+       /* Firmware command initialize */
+       ret = hns3_cmd_init(hw);
+       if (ret) {
+               hns3_err(hw, "Failed to init cmd: %d", ret);
+               goto err_cmd_init;
+       }
+
+       if (hw->reset.level == HNS3_VF_FULL_RESET) {
+               /*
+                * UIO enables msix by writing the pcie configuration space
+                * vfio_pci enables msix in rte_intr_enable.
+                */
+               if (pci_dev->kdrv == RTE_KDRV_IGB_UIO ||
+                   pci_dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+                       if (hns3vf_enable_msix(pci_dev, true))
+                               hns3_err(hw, "Failed to enable msix");
+               }
+
+               rte_intr_enable(&pci_dev->intr_handle);
+       }
+
+       ret = hns3_reset_all_queues(hns);
+       if (ret) {
+               hns3_err(hw, "Failed to reset all queues: %d", ret);
+               goto err_init;
+       }
+
+       ret = hns3vf_init_hardware(hns);
+       if (ret) {
+               hns3_err(hw, "Failed to init hardware: %d", ret);
+               goto err_init;
+       }
+
+       return 0;
+
+err_cmd_init:
+       hns3vf_set_bus_master(pci_dev, false);
+err_init:
+       hns3_cmd_uninit(hw);
+       return ret;
+}
+
 static const struct eth_dev_ops hns3vf_eth_dev_ops = {
        .dev_start          = hns3vf_dev_start,
        .dev_stop           = hns3vf_dev_stop,
@@ -1136,6 +1822,8 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = {
        .tx_queue_setup     = hns3_tx_queue_setup,
        .rx_queue_release   = hns3_dev_rx_queue_release,
        .tx_queue_release   = hns3_dev_tx_queue_release,
+       .rx_queue_intr_enable   = hns3_dev_rx_queue_intr_enable,
+       .rx_queue_intr_disable  = hns3_dev_rx_queue_intr_disable,
        .dev_configure      = hns3vf_dev_configure,
        .mac_addr_add       = hns3vf_add_mac_addr,
        .mac_addr_remove    = hns3vf_remove_mac_addr,
@@ -1153,6 +1841,16 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = {
        .dev_supported_ptypes_get = hns3_dev_supported_ptypes_get,
 };
 
+static const struct hns3_reset_ops hns3vf_reset_ops = {
+       .reset_service       = hns3vf_reset_service,
+       .stop_service        = hns3vf_stop_service,
+       .prepare_reset       = hns3vf_prepare_reset,
+       .wait_hardware_ready = hns3vf_wait_hardware_ready,
+       .reinit_dev          = hns3vf_reinit_dev,
+       .restore_conf        = hns3vf_restore_conf,
+       .start_service       = hns3vf_start_service,
+};
+
 static int
 hns3vf_dev_init(struct rte_eth_dev *eth_dev)
 {
@@ -1176,13 +1874,23 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
 
        hns3_set_rxtx_function(eth_dev);
        eth_dev->dev_ops = &hns3vf_eth_dev_ops;
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               hns3_mp_init_secondary();
+               hw->secondary_cnt++;
                return 0;
+       }
+
+       hns3_mp_init_primary();
 
        hw->adapter_state = HNS3_NIC_UNINITIALIZED;
        hns->is_vf = true;
        hw->data = eth_dev->data;
 
+       ret = hns3_reset_init(hw);
+       if (ret)
+               goto err_init_reset;
+       hw->reset.ops = &hns3vf_reset_ops;
+
        ret = hns3vf_init_vf(eth_dev);
        if (ret) {
                PMD_INIT_LOG(ERR, "Failed to init vf: %d", ret);
@@ -1211,16 +1919,24 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
         */
        eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
 
+       if (rte_atomic16_read(&hns->hw.reset.schedule) == SCHEDULE_PENDING) {
+               hns3_err(hw, "Reschedule reset service after dev_init");
+               hns3_schedule_reset(hns);
+       } else {
+               /* IMP will wait ready flag before reset */
+               hns3_notify_reset_ready(hw, false);
+       }
        rte_eal_alarm_set(HNS3VF_KEEP_ALIVE_INTERVAL, hns3vf_keep_alive_handler,
                          eth_dev);
-       rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler,
-                         eth_dev);
        return 0;
 
 err_rte_zmalloc:
        hns3vf_uninit_vf(eth_dev);
 
 err_init_vf:
+       rte_free(hw->reset.wait_data);
+
+err_init_reset:
        eth_dev->dev_ops = NULL;
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;