net/ifc: support live migration
authorXiao Wang <xiao.w.wang@intel.com>
Mon, 10 Sep 2018 11:01:23 +0000 (19:01 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Thu, 27 Sep 2018 23:41:02 +0000 (01:41 +0200)
IFCVF can help to log dirty page in live migration stage,
each queue's index can be read and configured to support
VHOST_USER_GET_VRING_BASE and VHOST_USER_SET_VRING_BASE.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
drivers/net/ifc/base/ifcvf.c
drivers/net/ifc/base/ifcvf.h
drivers/net/ifc/ifcvf_vdpa.c

index 4b22d9e..3c0b2df 100644 (file)
@@ -249,7 +249,7 @@ ifcvf_hw_disable(struct ifcvf_hw *hw)
                IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
                ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
                                (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-               hw->vring[i].last_avail_idx = (u16)ring_state;
+               hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
                hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
        }
 }
@@ -278,6 +278,37 @@ ifcvf_stop_hw(struct ifcvf_hw *hw)
        ifcvf_reset(hw);
 }
 
+void
+ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size)
+{
+       u8 *lm_cfg;
+
+       lm_cfg = hw->lm_cfg;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) =
+               log_base & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_HIGH) =
+               (log_base >> 32) & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_LOW) =
+               (log_base + log_size) & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_HIGH) =
+               ((log_base + log_size) >> 32) & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_ENABLE_VF;
+}
+
+void
+ifcvf_disable_logging(struct ifcvf_hw *hw)
+{
+       u8 *lm_cfg;
+
+       lm_cfg = hw->lm_cfg;
+       *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE;
+}
+
 void
 ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
 {
index badacb6..f026c70 100644 (file)
@@ -49,6 +49,7 @@
 #define IFCVF_LM_DISABLE               0x0
 #define IFCVF_LM_ENABLE_VF             0x1
 #define IFCVF_LM_ENABLE_PF             0x3
+#define IFCVF_LOG_BASE                 0x100000000000
 
 #define IFCVF_32_BIT_MASK              0xffffffff
 
@@ -142,6 +143,12 @@ ifcvf_start_hw(struct ifcvf_hw *hw);
 void
 ifcvf_stop_hw(struct ifcvf_hw *hw);
 
+void
+ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size);
+
+void
+ifcvf_disable_logging(struct ifcvf_hw *hw);
+
 void
 ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
 
index b9d0068..7d3085d 100644 (file)
@@ -7,6 +7,7 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/epoll.h>
+#include <linux/virtio_net.h>
 
 #include <rte_malloc.h>
 #include <rte_memory.h>
@@ -276,12 +277,30 @@ vdpa_ifcvf_start(struct ifcvf_internal *internal)
        return ifcvf_start_hw(&internal->hw);
 }
 
+static void
+ifcvf_used_ring_log(struct ifcvf_hw *hw, uint32_t queue, uint8_t *log_buf)
+{
+       uint32_t i, size;
+       uint64_t pfn;
+
+       pfn = hw->vring[queue].used / PAGE_SIZE;
+       size = hw->vring[queue].size * sizeof(struct vring_used_elem) +
+                       sizeof(__virtio16) * 3;
+
+       for (i = 0; i <= size / PAGE_SIZE; i++)
+               __sync_fetch_and_or_8(&log_buf[(pfn + i) / 8],
+                               1 << ((pfn + i) % 8));
+}
+
 static void
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
        struct ifcvf_hw *hw = &internal->hw;
        uint32_t i;
        int vid;
+       uint64_t features;
+       uint64_t log_base, log_size;
+       uint8_t *log_buf;
 
        vid = internal->vid;
        ifcvf_stop_hw(hw);
@@ -289,6 +308,21 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal)
        for (i = 0; i < hw->nr_vring; i++)
                rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
                                hw->vring[i].last_used_idx);
+
+       rte_vhost_get_negotiated_features(vid, &features);
+       if (RTE_VHOST_NEED_LOG(features)) {
+               ifcvf_disable_logging(hw);
+               rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+               rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+                               log_base, IFCVF_LOG_BASE, log_size);
+               /*
+                * IFCVF marks dirty memory pages for only packet buffer,
+                * SW helps to mark the used ring as dirty after device stops.
+                */
+               log_buf = (uint8_t *)(uintptr_t)log_base;
+               for (i = 0; i < hw->nr_vring; i++)
+                       ifcvf_used_ring_log(hw, i, log_buf);
+       }
 }
 
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
@@ -548,6 +582,35 @@ ifcvf_dev_close(int vid)
        return 0;
 }
 
+static int
+ifcvf_set_features(int vid)
+{
+       uint64_t features;
+       int did;
+       struct internal_list *list;
+       struct ifcvf_internal *internal;
+       uint64_t log_base, log_size;
+
+       did = rte_vhost_get_vdpa_device_id(vid);
+       list = find_internal_resource_by_did(did);
+       if (list == NULL) {
+               DRV_LOG(ERR, "Invalid device id: %d", did);
+               return -1;
+       }
+
+       internal = list->internal;
+       rte_vhost_get_negotiated_features(vid, &features);
+
+       if (RTE_VHOST_NEED_LOG(features)) {
+               rte_vhost_get_log_base(vid, &log_base, &log_size);
+               rte_vfio_container_dma_map(internal->vfio_container_fd,
+                               log_base, IFCVF_LOG_BASE, log_size);
+               ifcvf_enable_logging(&internal->hw, IFCVF_LOG_BASE, log_size);
+       }
+
+       return 0;
+}
+
 static int
 ifcvf_get_vfio_group_fd(int vid)
 {
@@ -664,7 +727,7 @@ struct rte_vdpa_dev_ops ifcvf_ops = {
        .dev_conf = ifcvf_dev_config,
        .dev_close = ifcvf_dev_close,
        .set_vring_state = NULL,
-       .set_features = NULL,
+       .set_features = ifcvf_set_features,
        .migration_done = NULL,
        .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
        .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
@@ -699,7 +762,11 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        features = ifcvf_get_features(&internal->hw);
        internal->features = (features &
                ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-               (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
+               (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+               (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+               (1ULL << VIRTIO_NET_F_STATUS) |
+               (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+               (1ULL << VHOST_F_LOG_ALL);
 
        internal->dev_addr.pci_addr = pci_dev->addr;
        internal->dev_addr.type = PCI_ADDR;