vdpa/ifc: fix build with GCC 12
[dpdk.git] / drivers / vdpa / ifc / ifcvf_vdpa.c
index f81d13a..8bc971c 100644 (file)
 #include <linux/virtio_net.h>
 #include <stdbool.h>
 
+#include <rte_eal_paging.h>
 #include <rte_malloc.h>
 #include <rte_memory.h>
 #include <rte_bus_pci.h>
 #include <rte_vhost.h>
 #include <rte_vdpa.h>
-#include <rte_vdpa_dev.h>
+#include <vdpa_driver.h>
 #include <rte_vfio.h>
 #include <rte_spinlock.h>
 #include <rte_log.h>
 
 #include "base/ifcvf.h"
 
+RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
        rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
                "IFCVF %s(): " fmt "\n", __func__, ##args)
 
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
 #define IFCVF_USED_RING_LEN(size) \
        ((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3)
 
 #define IFCVF_VDPA_MODE                "vdpa"
 #define IFCVF_SW_FALLBACK_LM   "sw-live-migration"
 
+#define THREAD_NAME_LEN        16
+
 static const char * const ifcvf_valid_arguments[] = {
        IFCVF_VDPA_MODE,
        IFCVF_SW_FALLBACK_LM,
        NULL
 };
 
-static int ifcvf_vdpa_logtype;
-
 struct ifcvf_internal {
        struct rte_pci_device *pdev;
        struct ifcvf_hw hw;
+       int configured;
        int vfio_container_fd;
        int vfio_group_fd;
        int vfio_dev_fd;
        pthread_t tid;  /* thread for notify relay */
+       pthread_t intr_tid; /* thread for config space change interrupt relay */
        int epfd;
+       int csc_epfd;
        int vid;
        struct rte_vdpa_device *vdev;
        uint16_t max_queues;
@@ -76,6 +77,12 @@ struct internal_list {
        struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+       uint64_t features;
+       struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
        TAILQ_HEAD_INITIALIZER(internal_list);
@@ -163,7 +170,7 @@ ifcvf_vfio_setup(struct ifcvf_internal *internal)
        if (rte_pci_map_device(dev))
                goto err;
 
-       internal->vfio_dev_fd = dev->intr_handle.vfio_dev_fd;
+       internal->vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
 
        for (i = 0; i < RTE_MIN(PCI_MAX_RESOURCE, IFCVF_PCI_MAX_RESOURCE);
                        i++) {
@@ -183,7 +190,7 @@ err:
 }
 
 static int
-ifcvf_dma_map(struct ifcvf_internal *internal, int do_map)
+ifcvf_dma_map(struct ifcvf_internal *internal, bool do_map)
 {
        uint32_t i;
        int ret;
@@ -227,8 +234,7 @@ ifcvf_dma_map(struct ifcvf_internal *internal, int do_map)
        }
 
 exit:
-       if (mem)
-               free(mem);
+       free(mem);
        return ret;
 }
 
@@ -254,8 +260,7 @@ hva_to_gpa(int vid, uint64_t hva)
        }
 
 exit:
-       if (mem)
-               free(mem);
+       free(mem);
        return gpa;
 }
 
@@ -313,8 +318,34 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal)
        uint64_t features = 0;
        uint64_t log_base = 0, log_size = 0;
        uint64_t len;
+       u32 ring_state = 0;
 
        vid = internal->vid;
+
+       /* to make sure no packet is lost for blk device
+        * do not stop until last_avail_idx == last_used_idx
+        */
+       if (internal->hw.device_type == IFCVF_BLK) {
+               for (i = 0; i < hw->nr_vring; i++) {
+                       do {
+                               if (hw->lm_cfg != NULL)
+                                       ring_state = *(u32 *)(hw->lm_cfg +
+                                               IFCVF_LM_RING_STATE_OFFSET +
+                                               i * IFCVF_LM_CFG_SIZE);
+                               hw->vring[i].last_avail_idx =
+                                       (u16)(ring_state & IFCVF_16_BIT_MASK);
+                               hw->vring[i].last_used_idx =
+                                       (u16)(ring_state >> 16);
+                               if (hw->vring[i].last_avail_idx !=
+                                       hw->vring[i].last_used_idx) {
+                                       ifcvf_notify_queue(hw, i);
+                                       usleep(10);
+                               }
+                       } while (hw->vring[i].last_avail_idx !=
+                               hw->vring[i].last_used_idx);
+               }
+       }
+
        ifcvf_stop_hw(hw);
 
        for (i = 0; i < hw->nr_vring; i++)
@@ -357,6 +388,8 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx)
        vring.callfd = -1;
 
        nr_vring = rte_vhost_get_vring_num(internal->vid);
+       if (nr_vring > IFCVF_MAX_QUEUES * 2)
+               return -1;
 
        irq_set = (struct vfio_irq_set *)irq_set_buf;
        irq_set->argsz = sizeof(irq_set_buf);
@@ -366,7 +399,9 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx)
        irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
        irq_set->start = 0;
        fd_ptr = (int *)&irq_set->data;
-       fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = internal->pdev->intr_handle.fd;
+       /* The first interrupt is for the configure space change notification */
+       fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
+               rte_intr_fd_get(internal->pdev->intr_handle);
 
        for (i = 0; i < nr_vring; i++)
                internal->intr_fd[i] = -1;
@@ -374,7 +409,13 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx)
        for (i = 0; i < nr_vring; i++) {
                rte_vhost_get_vhost_vring(internal->vid, i, &vring);
                fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-               if ((i & 1) == 0 && m_rx == true) {
+               if (m_rx == true &&
+                       ((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK)) {
+                       /* For the net we only need to relay rx queue,
+                        * which will change the mem of VM.
+                        * For the blk we need to relay all the read cmd
+                        * of each queue
+                        */
                        fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
                        if (fd < 0) {
                                DRV_LOG(ERR, "can't setup eventfd: %s",
@@ -497,14 +538,17 @@ notify_relay(void *arg)
 static int
 setup_notify_relay(struct ifcvf_internal *internal)
 {
+       char name[THREAD_NAME_LEN];
        int ret;
 
-       ret = pthread_create(&internal->tid, NULL, notify_relay,
-                       (void *)internal);
-       if (ret) {
+       snprintf(name, sizeof(name), "ifc-notify-%d", internal->vid);
+       ret = rte_ctrl_thread_create(&internal->tid, name, NULL, notify_relay,
+                                    (void *)internal);
+       if (ret != 0) {
                DRV_LOG(ERR, "failed to create notify relay pthread.");
                return -1;
        }
+
        return 0;
 }
 
@@ -526,6 +570,114 @@ unset_notify_relay(struct ifcvf_internal *internal)
        return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+       int vid = internal->vid;
+       int ret;
+
+       ret = rte_vhost_slave_config_change(vid, 1);
+       if (ret)
+               DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+       struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+       struct epoll_event csc_event;
+       struct epoll_event ev;
+       uint64_t buf;
+       int nbytes;
+       int csc_epfd, csc_val = 0;
+
+       csc_epfd = epoll_create(1);
+       if (csc_epfd < 0) {
+               DRV_LOG(ERR, "failed to create epoll for config space change.");
+               return NULL;
+       }
+
+       ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+       ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+       if (epoll_ctl(csc_epfd, EPOLL_CTL_ADD,
+               rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+               DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+               goto out;
+       }
+
+       internal->csc_epfd = csc_epfd;
+
+       for (;;) {
+               csc_val = epoll_wait(csc_epfd, &csc_event, 1, -1);
+               if (csc_val < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       DRV_LOG(ERR, "epoll_wait return fail.");
+                       goto out;
+               } else if (csc_val == 0) {
+                       continue;
+               } else {
+                       /* csc_val > 0 */
+                       nbytes = read(csc_event.data.fd, &buf, 8);
+                       if (nbytes < 0) {
+                               if (errno == EINTR ||
+                                   errno == EWOULDBLOCK ||
+                                   errno == EAGAIN)
+                                       continue;
+                               DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+                                       csc_event.data.fd,
+                                       strerror(errno));
+                               goto out;
+                       } else if (nbytes == 0) {
+                               DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+                                       csc_event.data.fd);
+                               continue;
+                       } else {
+                               virtio_interrupt_handler(internal);
+                       }
+               }
+       }
+
+out:
+       if (csc_epfd >= 0)
+               close(csc_epfd);
+       internal->csc_epfd = -1;
+
+       return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+       char name[THREAD_NAME_LEN];
+       int ret;
+
+       snprintf(name, sizeof(name), "ifc-intr-%d", internal->vid);
+       ret = rte_ctrl_thread_create(&internal->intr_tid, name, NULL,
+                                    intr_relay, (void *)internal);
+       if (ret) {
+               DRV_LOG(ERR, "failed to create notify relay pthread.");
+               return -1;
+       }
+       return 0;
+}
+
+static void
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+       void *status;
+
+       if (internal->intr_tid) {
+               pthread_cancel(internal->intr_tid);
+               pthread_join(internal->intr_tid, &status);
+       }
+       internal->intr_tid = 0;
+
+       if (internal->csc_epfd >= 0)
+               close(internal->csc_epfd);
+       internal->csc_epfd = -1;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -536,11 +688,11 @@ update_datapath(struct ifcvf_internal *internal)
        if (!rte_atomic32_read(&internal->running) &&
            (rte_atomic32_read(&internal->started) &&
             rte_atomic32_read(&internal->dev_attached))) {
-               ret = ifcvf_dma_map(internal, 1);
+               ret = ifcvf_dma_map(internal, true);
                if (ret)
                        goto err;
 
-               ret = vdpa_enable_vfio_intr(internal, 0);
+               ret = vdpa_enable_vfio_intr(internal, false);
                if (ret)
                        goto err;
 
@@ -552,10 +704,16 @@ update_datapath(struct ifcvf_internal *internal)
                if (ret)
                        goto err;
 
+               ret = setup_intr_relay(internal);
+               if (ret)
+                       goto err;
+
                rte_atomic32_set(&internal->running, 1);
        } else if (rte_atomic32_read(&internal->running) &&
                   (!rte_atomic32_read(&internal->started) ||
                    !rte_atomic32_read(&internal->dev_attached))) {
+               unset_intr_relay(internal);
+
                ret = unset_notify_relay(internal);
                if (ret)
                        goto err;
@@ -566,7 +724,7 @@ update_datapath(struct ifcvf_internal *internal)
                if (ret)
                        goto err;
 
-               ret = ifcvf_dma_map(internal, 0);
+               ret = ifcvf_dma_map(internal, false);
                if (ret)
                        goto err;
 
@@ -600,11 +758,11 @@ m_ifcvf_start(struct ifcvf_internal *internal)
        for (i = 0; i < nr_vring; i++) {
                rte_vhost_get_vhost_vring(vid, i, &vq);
 
-               size = RTE_ALIGN_CEIL(vring_size(vq.size, PAGE_SIZE),
-                               PAGE_SIZE);
-               vring_buf = rte_zmalloc("ifcvf", size, PAGE_SIZE);
+               size = RTE_ALIGN_CEIL(vring_size(vq.size, rte_mem_page_size()),
+                               rte_mem_page_size());
+               vring_buf = rte_zmalloc("ifcvf", size, rte_mem_page_size());
                vring_init(&internal->m_vring[i], vq.size, vring_buf,
-                               PAGE_SIZE);
+                               rte_mem_page_size());
 
                ret = rte_vfio_container_dma_map(internal->vfio_container_fd,
                        (uint64_t)(uintptr_t)vring_buf, m_vring_iova, size);
@@ -627,8 +785,10 @@ m_ifcvf_start(struct ifcvf_internal *internal)
                }
                hw->vring[i].avail = gpa;
 
-               /* Direct I/O for Tx queue, relay for Rx queue */
-               if (i & 1) {
+               /* NET: Direct I/O for Tx queue, relay for Rx queue
+                * BLK: relay every queue
+                */
+               if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
                        gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
                        if (gpa == 0) {
                                DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -658,8 +818,7 @@ m_ifcvf_start(struct ifcvf_internal *internal)
 
 error:
        for (i = 0; i < nr_vring; i++)
-               if (internal->m_vring[i].desc)
-                       rte_free(internal->m_vring[i].desc);
+               rte_free(internal->m_vring[i].desc);
 
        return -1;
 }
@@ -679,15 +838,19 @@ m_ifcvf_stop(struct ifcvf_internal *internal)
 
        for (i = 0; i < hw->nr_vring; i++) {
                /* synchronize remaining new used entries if any */
-               if ((i & 1) == 0)
+               if (internal->hw.device_type == IFCVF_NET) {
+                       if ((i & 1) == 0)
+                               update_used_ring(internal, i);
+               } else if (internal->hw.device_type == IFCVF_BLK) {
                        update_used_ring(internal, i);
+               }
 
                rte_vhost_get_vhost_vring(vid, i, &vq);
                len = IFCVF_USED_RING_LEN(vq.size);
                rte_vhost_log_used_vring(vid, i, 0, len);
 
-               size = RTE_ALIGN_CEIL(vring_size(vq.size, PAGE_SIZE),
-                               PAGE_SIZE);
+               size = RTE_ALIGN_CEIL(vring_size(vq.size, rte_mem_page_size()),
+                               rte_mem_page_size());
                rte_vfio_container_dma_unmap(internal->vfio_container_fd,
                        (uint64_t)(uintptr_t)internal->m_vring[i].desc,
                        m_vring_iova, size);
@@ -742,7 +905,9 @@ vring_relay(void *arg)
                }
        }
 
-       for (qid = 0; qid < q_num; qid += 2) {
+       for (qid = 0; qid < q_num; qid += 1) {
+               if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
+                       continue;
                ev.events = EPOLLIN | EPOLLPRI;
                /* leave a flag to mark it's for interrupt */
                ev.data.u64 = 1 | qid << 1 |
@@ -765,7 +930,7 @@ vring_relay(void *arg)
                if (nfds < 0) {
                        if (errno == EINTR)
                                continue;
-                       DRV_LOG(ERR, "epoll_wait return fail\n");
+                       DRV_LOG(ERR, "epoll_wait return fail.");
                        return NULL;
                }
 
@@ -800,14 +965,17 @@ vring_relay(void *arg)
 static int
 setup_vring_relay(struct ifcvf_internal *internal)
 {
+       char name[THREAD_NAME_LEN];
        int ret;
 
-       ret = pthread_create(&internal->tid, NULL, vring_relay,
-                       (void *)internal);
-       if (ret) {
+       snprintf(name, sizeof(name), "ifc-vring-%d", internal->vid);
+       ret = rte_ctrl_thread_create(&internal->tid, name, NULL, vring_relay,
+                                    (void *)internal);
+       if (ret != 0) {
                DRV_LOG(ERR, "failed to create ring relay pthread.");
                return -1;
        }
+
        return 0;
 }
 
@@ -838,14 +1006,17 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal)
        /* stop the direct IO data path */
        unset_notify_relay(internal);
        vdpa_ifcvf_stop(internal);
+
+       unset_intr_relay(internal);
+
        vdpa_disable_vfio_intr(internal);
 
-       ret = rte_vhost_host_notifier_ctrl(vid, false);
+       ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
        if (ret && ret != -ENOTSUP)
                goto error;
 
        /* set up interrupt for interrupt relay */
-       ret = vdpa_enable_vfio_intr(internal, 1);
+       ret = vdpa_enable_vfio_intr(internal, true);
        if (ret)
                goto unmap;
 
@@ -859,7 +1030,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal)
        if (ret)
                goto stop_vf;
 
-       rte_vhost_host_notifier_ctrl(vid, true);
+       rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true);
 
        internal->sw_fallback_running = true;
 
@@ -870,7 +1041,7 @@ stop_vf:
 unset_intr:
        vdpa_disable_vfio_intr(internal);
 unmap:
-       ifcvf_dma_map(internal, 0);
+       ifcvf_dma_map(internal, false);
 error:
        return -1;
 }
@@ -894,10 +1065,11 @@ ifcvf_dev_config(int vid)
        rte_atomic32_set(&internal->dev_attached, 1);
        update_datapath(internal);
 
-       if (rte_vhost_host_notifier_ctrl(vid, true) != 0)
+       if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
                DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
                                vdev->device->name);
 
+       internal->configured = 1;
        return 0;
 }
 
@@ -928,7 +1100,7 @@ ifcvf_dev_close(int vid)
                vdpa_disable_vfio_intr(internal);
 
                /* unset DMA map for guest memory */
-               ifcvf_dma_map(internal, 0);
+               ifcvf_dma_map(internal, false);
 
                internal->sw_fallback_running = false;
        } else {
@@ -936,6 +1108,7 @@ ifcvf_dev_close(int vid)
                update_datapath(internal);
        }
 
+       internal->configured = 0;
        return 0;
 }
 
@@ -1074,7 +1247,12 @@ ifcvf_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
                 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | \
                 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
                 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
-                1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+                1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+                1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+               (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1084,18 +1262,90 @@ ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
        return 0;
 }
 
-static struct rte_vdpa_dev_ops ifcvf_ops = {
+static int
+ifcvf_set_vring_state(int vid, int vring, int state)
+{
+       struct rte_vdpa_device *vdev;
+       struct internal_list *list;
+       struct ifcvf_internal *internal;
+       struct ifcvf_hw *hw;
+       struct ifcvf_pci_common_cfg *cfg;
+       int ret = 0;
+
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
+       if (list == NULL) {
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+               return -1;
+       }
+
+       internal = list->internal;
+       if (vring < 0 || vring >= internal->max_queues * 2) {
+               DRV_LOG(ERR, "Vring index %d not correct", vring);
+               return -1;
+       }
+
+       hw = &internal->hw;
+       if (!internal->configured)
+               goto exit;
+
+       cfg = hw->common_cfg;
+       IFCVF_WRITE_REG16(vring, &cfg->queue_select);
+       IFCVF_WRITE_REG16(!!state, &cfg->queue_enable);
+
+       if (!state && hw->vring[vring].enable) {
+               ret = vdpa_disable_vfio_intr(internal);
+               if (ret)
+                       return ret;
+       }
+
+       if (state && !hw->vring[vring].enable) {
+               ret = vdpa_enable_vfio_intr(internal, false);
+               if (ret)
+                       return ret;
+       }
+
+exit:
+       hw->vring[vring].enable = !!state;
+       return 0;
+}
+
+static int
+ifcvf_get_device_type(struct rte_vdpa_device *vdev,
+       uint32_t *type)
+{
+       struct ifcvf_internal *internal;
+       struct internal_list *list;
+
+       list = find_internal_resource_by_vdev(vdev);
+       if (list == NULL) {
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+               return -1;
+       }
+
+       internal = list->internal;
+
+       if (internal->hw.device_type == IFCVF_BLK)
+               *type = RTE_VHOST_VDPA_DEVICE_TYPE_BLK;
+       else
+               *type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+
+       return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_net_ops = {
        .get_queue_num = ifcvf_get_queue_num,
        .get_features = ifcvf_get_vdpa_features,
        .get_protocol_features = ifcvf_get_protocol_features,
        .dev_conf = ifcvf_dev_config,
        .dev_close = ifcvf_dev_close,
-       .set_vring_state = NULL,
+       .set_vring_state = ifcvf_set_vring_state,
        .set_features = ifcvf_set_features,
        .migration_done = NULL,
        .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
        .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
        .get_notify_area = ifcvf_get_notify_area,
+       .get_dev_type = ifcvf_get_device_type,
 };
 
 static inline int
@@ -1113,6 +1363,139 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args)
        return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+       uint16_t pci_device_id = pci_dev->id.device_id;
+       uint16_t device_id;
+
+       if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+               DRV_LOG(ERR, "Probe device is not a virtio device\n");
+               return -1;
+       }
+
+       if (pci_device_id < 0x1040) {
+               /* Transitional devices: use the PCI subsystem device id as
+                * virtio device id, same as legacy driver always did.
+                */
+               device_id = pci_dev->id.subsystem_device_id;
+       } else {
+               /* Modern devices: simply use PCI device id,
+                * but start from 0x1040.
+                */
+               device_id = pci_device_id - 0x1040;
+       }
+
+       return device_id;
+}
+
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t size)
+{
+       struct virtio_blk_config *dev_cfg;
+       struct ifcvf_internal *internal;
+       struct rte_vdpa_device *vdev;
+       struct internal_list *list;
+       uint32_t i;
+       uint64_t capacity = 0;
+       uint8_t *byte;
+
+       if (size != sizeof(struct virtio_blk_config)) {
+               DRV_LOG(ERR, "Invalid len: %u, required: %u",
+                       size, (uint32_t)sizeof(struct virtio_blk_config));
+               return -1;
+       }
+
+       vdev = rte_vhost_get_vdpa_device(vid);
+       if (vdev == NULL) {
+               DRV_LOG(ERR, "Invalid vDPA device vid: %d", vid);
+               return -1;
+       }
+
+       list = find_internal_resource_by_vdev(vdev);
+       if (list == NULL) {
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+               return -1;
+       }
+
+       internal = list->internal;
+
+       for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+               config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+       dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+       /* cannot read 64-bit register in one attempt, so read byte by byte. */
+       for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+               byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+               capacity |= (uint64_t)*byte << (i * 8);
+       }
+       /* The capacity is number of sectors in 512-byte.
+        * So right shift 1 bit  we get in K,
+        * another right shift 10 bits we get in M,
+        * right shift 10 more bits, we get in G.
+        * To show capacity in G, we right shift 21 bits in total.
+        */
+       DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+       DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
+       DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
+       DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
+       DRV_LOG(DEBUG, "geometry");
+       DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
+       DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
+       DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
+       DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+       DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+               config[0], config[1], config[2], config[3], config[4],
+               config[5], config[6], config[7]);
+       return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+       uint64_t *features)
+{
+       RTE_SET_USED(vdev);
+
+       *features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+       *features |= VDPA_BLK_PROTOCOL_FEATURES;
+       return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+       .get_queue_num = ifcvf_get_queue_num,
+       .get_features = ifcvf_get_vdpa_features,
+       .set_features = ifcvf_set_features,
+       .get_protocol_features = ifcvf_blk_get_protocol_features,
+       .dev_conf = ifcvf_dev_config,
+       .dev_close = ifcvf_dev_close,
+       .set_vring_state = ifcvf_set_vring_state,
+       .migration_done = NULL,
+       .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+       .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+       .get_notify_area = ifcvf_get_notify_area,
+       .get_config = ifcvf_blk_get_config,
+       .get_dev_type = ifcvf_get_device_type,
+};
+
+struct rte_vdpa_dev_info dev_info[] = {
+       {
+               .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+                           (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+                           (1ULL << VIRTIO_NET_F_STATUS) |
+                           (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+                           (1ULL << VHOST_F_LOG_ALL),
+               .ops = &ifcvf_net_ops,
+       },
+       {
+               .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+                           (1ULL << VHOST_F_LOG_ALL),
+               .ops = &ifcvf_blk_ops,
+       },
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                struct rte_pci_device *pci_dev)
@@ -1124,6 +1507,10 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        int sw_fallback_lm = 0;
        struct rte_kvargs *kvlist = NULL;
        int ret = 0;
+       int16_t device_id;
+       uint64_t capacity = 0;
+       uint8_t *byte;
+       uint32_t i;
 
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
@@ -1170,15 +1557,58 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                goto error;
        }
 
+       internal->configured = 0;
        internal->max_queues = IFCVF_MAX_QUEUES;
        features = ifcvf_get_features(&internal->hw);
-       internal->features = (features &
-               ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-               (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-               (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-               (1ULL << VIRTIO_NET_F_STATUS) |
-               (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-               (1ULL << VHOST_F_LOG_ALL);
+
+       device_id = ifcvf_pci_get_device_type(pci_dev);
+       if (device_id < 0) {
+               DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+               goto error;
+       }
+
+       if (device_id == VIRTIO_ID_NET) {
+               internal->hw.device_type = IFCVF_NET;
+               internal->features = features &
+                                       ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+               internal->features |= dev_info[IFCVF_NET].features;
+       } else if (device_id == VIRTIO_ID_BLOCK) {
+               internal->hw.device_type = IFCVF_BLK;
+               internal->features = features &
+                                       ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+               internal->features |= dev_info[IFCVF_BLK].features;
+
+               /* cannot read 64-bit register in one attempt,
+                * so read byte by byte.
+                */
+               for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+                       byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+                       capacity |= (uint64_t)*byte << (i * 8);
+               }
+               /* The capacity is number of sectors in 512-byte.
+                * So right shift 1 bit  we get in K,
+                * another right shift 10 bits we get in M,
+                * right shift 10 more bits, we get in G.
+                * To show capacity in G, we right shift 21 bits in total.
+                */
+               DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+               DRV_LOG(DEBUG, "size_max  : 0x%08x",
+                       internal->hw.blk_cfg->size_max);
+               DRV_LOG(DEBUG, "seg_max   : 0x%08x",
+                       internal->hw.blk_cfg->seg_max);
+               DRV_LOG(DEBUG, "blk_size  : 0x%08x",
+                       internal->hw.blk_cfg->blk_size);
+               DRV_LOG(DEBUG, "geometry");
+               DRV_LOG(DEBUG, "    cylinders: %u",
+                       internal->hw.blk_cfg->geometry.cylinders);
+               DRV_LOG(DEBUG, "    heads    : %u",
+                       internal->hw.blk_cfg->geometry.heads);
+               DRV_LOG(DEBUG, "    sectors  : %u",
+                       internal->hw.blk_cfg->geometry.sectors);
+               DRV_LOG(DEBUG, "num_queues: 0x%08x",
+                       internal->hw.blk_cfg->num_queues);
+       }
 
        list->internal = internal;
 
@@ -1190,7 +1620,8 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        }
        internal->sw_lm = sw_fallback_lm;
 
-       internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+       internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+                               dev_info[internal->hw.device_type].ops);
        if (internal->vdev == NULL) {
                DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
                goto error;
@@ -1253,11 +1684,25 @@ ifcvf_pci_remove(struct rte_pci_device *pci_dev)
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
        { .class_id = RTE_CLASS_ANY_ID,
          .vendor_id = IFCVF_VENDOR_ID,
-         .device_id = IFCVF_DEVICE_ID,
+         .device_id = IFCVF_NET_DEVICE_ID,
          .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
          .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
        },
 
+       { .class_id = RTE_CLASS_ANY_ID,
+         .vendor_id = IFCVF_VENDOR_ID,
+         .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+         .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+         .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+       },
+
+       { .class_id = RTE_CLASS_ANY_ID,
+         .vendor_id = IFCVF_VENDOR_ID,
+         .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+         .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+         .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+       },
+
        { .vendor_id = 0, /* sentinel */
        },
 };
@@ -1272,10 +1717,3 @@ static struct rte_pci_driver rte_ifcvf_vdpa = {
 RTE_PMD_REGISTER_PCI(net_ifcvf, rte_ifcvf_vdpa);
 RTE_PMD_REGISTER_PCI_TABLE(net_ifcvf, pci_id_ifcvf_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_ifcvf, "* vfio-pci");
-
-RTE_INIT(ifcvf_vdpa_init_log)
-{
-       ifcvf_vdpa_logtype = rte_log_register("pmd.net.ifcvf_vdpa");
-       if (ifcvf_vdpa_logtype >= 0)
-               rte_log_set_level(ifcvf_vdpa_logtype, RTE_LOG_NOTICE);
-}