vdpa/mlx5: support device cleanup callback
[dpdk.git] / drivers / vdpa / ifc / ifcvf_vdpa.c
index da4667b..9f05595 100644 (file)
 #include <linux/virtio_net.h>
 #include <stdbool.h>
 
+#include <rte_eal_paging.h>
 #include <rte_malloc.h>
 #include <rte_memory.h>
 #include <rte_bus_pci.h>
 #include <rte_vhost.h>
 #include <rte_vdpa.h>
+#include <vdpa_driver.h>
 #include <rte_vfio.h>
 #include <rte_spinlock.h>
 #include <rte_log.h>
 
 #include "base/ifcvf.h"
 
+RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
        rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
                "IFCVF %s(): " fmt "\n", __func__, ##args)
 
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
 #define IFCVF_USED_RING_LEN(size) \
        ((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3)
 
 #define IFCVF_VDPA_MODE                "vdpa"
 #define IFCVF_SW_FALLBACK_LM   "sw-live-migration"
 
+#define THREAD_NAME_LEN        16
+
 static const char * const ifcvf_valid_arguments[] = {
        IFCVF_VDPA_MODE,
        IFCVF_SW_FALLBACK_LM,
        NULL
 };
 
-static int ifcvf_vdpa_logtype;
-
 struct ifcvf_internal {
-       struct rte_vdpa_dev_addr dev_addr;
        struct rte_pci_device *pdev;
        struct ifcvf_hw hw;
+       int configured;
        int vfio_container_fd;
        int vfio_group_fd;
        int vfio_dev_fd;
        pthread_t tid;  /* thread for notify relay */
        int epfd;
        int vid;
-       int did;
+       struct rte_vdpa_device *vdev;
        uint16_t max_queues;
        uint64_t features;
        rte_atomic32_t started;
@@ -85,7 +84,7 @@ static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
 static void update_used_ring(struct ifcvf_internal *internal, uint16_t qid);
 
 static struct internal_list *
-find_internal_resource_by_did(int did)
+find_internal_resource_by_vdev(struct rte_vdpa_device *vdev)
 {
        int found = 0;
        struct internal_list *list;
@@ -93,7 +92,7 @@ find_internal_resource_by_did(int did)
        pthread_mutex_lock(&internal_list_lock);
 
        TAILQ_FOREACH(list, &internal_list, next) {
-               if (did == list->internal->did) {
+               if (vdev == list->internal->vdev) {
                        found = 1;
                        break;
                }
@@ -116,7 +115,8 @@ find_internal_resource_by_dev(struct rte_pci_device *pdev)
        pthread_mutex_lock(&internal_list_lock);
 
        TAILQ_FOREACH(list, &internal_list, next) {
-               if (pdev == list->internal->pdev) {
+               if (!rte_pci_addr_cmp(&pdev->addr,
+                                       &list->internal->pdev->addr)) {
                        found = 1;
                        break;
                }
@@ -162,7 +162,7 @@ ifcvf_vfio_setup(struct ifcvf_internal *internal)
        if (rte_pci_map_device(dev))
                goto err;
 
-       internal->vfio_dev_fd = dev->intr_handle.vfio_dev_fd;
+       internal->vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
 
        for (i = 0; i < RTE_MIN(PCI_MAX_RESOURCE, IFCVF_PCI_MAX_RESOURCE);
                        i++) {
@@ -182,7 +182,7 @@ err:
 }
 
 static int
-ifcvf_dma_map(struct ifcvf_internal *internal, int do_map)
+ifcvf_dma_map(struct ifcvf_internal *internal, bool do_map)
 {
        uint32_t i;
        int ret;
@@ -226,8 +226,7 @@ ifcvf_dma_map(struct ifcvf_internal *internal, int do_map)
        }
 
 exit:
-       if (mem)
-               free(mem);
+       free(mem);
        return ret;
 }
 
@@ -253,8 +252,7 @@ hva_to_gpa(int vid, uint64_t hva)
        }
 
 exit:
-       if (mem)
-               free(mem);
+       free(mem);
        return gpa;
 }
 
@@ -365,7 +363,8 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx)
        irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
        irq_set->start = 0;
        fd_ptr = (int *)&irq_set->data;
-       fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = internal->pdev->intr_handle.fd;
+       fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
+               rte_intr_fd_get(internal->pdev->intr_handle);
 
        for (i = 0; i < nr_vring; i++)
                internal->intr_fd[i] = -1;
@@ -496,14 +495,17 @@ notify_relay(void *arg)
 static int
 setup_notify_relay(struct ifcvf_internal *internal)
 {
+       char name[THREAD_NAME_LEN];
        int ret;
 
-       ret = pthread_create(&internal->tid, NULL, notify_relay,
-                       (void *)internal);
-       if (ret) {
+       snprintf(name, sizeof(name), "ifc-notify-%d", internal->vid);
+       ret = rte_ctrl_thread_create(&internal->tid, name, NULL, notify_relay,
+                                    (void *)internal);
+       if (ret != 0) {
                DRV_LOG(ERR, "failed to create notify relay pthread.");
                return -1;
        }
+
        return 0;
 }
 
@@ -535,11 +537,11 @@ update_datapath(struct ifcvf_internal *internal)
        if (!rte_atomic32_read(&internal->running) &&
            (rte_atomic32_read(&internal->started) &&
             rte_atomic32_read(&internal->dev_attached))) {
-               ret = ifcvf_dma_map(internal, 1);
+               ret = ifcvf_dma_map(internal, true);
                if (ret)
                        goto err;
 
-               ret = vdpa_enable_vfio_intr(internal, 0);
+               ret = vdpa_enable_vfio_intr(internal, false);
                if (ret)
                        goto err;
 
@@ -565,7 +567,7 @@ update_datapath(struct ifcvf_internal *internal)
                if (ret)
                        goto err;
 
-               ret = ifcvf_dma_map(internal, 0);
+               ret = ifcvf_dma_map(internal, false);
                if (ret)
                        goto err;
 
@@ -599,11 +601,11 @@ m_ifcvf_start(struct ifcvf_internal *internal)
        for (i = 0; i < nr_vring; i++) {
                rte_vhost_get_vhost_vring(vid, i, &vq);
 
-               size = RTE_ALIGN_CEIL(vring_size(vq.size, PAGE_SIZE),
-                               PAGE_SIZE);
-               vring_buf = rte_zmalloc("ifcvf", size, PAGE_SIZE);
+               size = RTE_ALIGN_CEIL(vring_size(vq.size, rte_mem_page_size()),
+                               rte_mem_page_size());
+               vring_buf = rte_zmalloc("ifcvf", size, rte_mem_page_size());
                vring_init(&internal->m_vring[i], vq.size, vring_buf,
-                               PAGE_SIZE);
+                               rte_mem_page_size());
 
                ret = rte_vfio_container_dma_map(internal->vfio_container_fd,
                        (uint64_t)(uintptr_t)vring_buf, m_vring_iova, size);
@@ -657,8 +659,7 @@ m_ifcvf_start(struct ifcvf_internal *internal)
 
 error:
        for (i = 0; i < nr_vring; i++)
-               if (internal->m_vring[i].desc)
-                       rte_free(internal->m_vring[i].desc);
+               rte_free(internal->m_vring[i].desc);
 
        return -1;
 }
@@ -685,8 +686,8 @@ m_ifcvf_stop(struct ifcvf_internal *internal)
                len = IFCVF_USED_RING_LEN(vq.size);
                rte_vhost_log_used_vring(vid, i, 0, len);
 
-               size = RTE_ALIGN_CEIL(vring_size(vq.size, PAGE_SIZE),
-                               PAGE_SIZE);
+               size = RTE_ALIGN_CEIL(vring_size(vq.size, rte_mem_page_size()),
+                               rte_mem_page_size());
                rte_vfio_container_dma_unmap(internal->vfio_container_fd,
                        (uint64_t)(uintptr_t)internal->m_vring[i].desc,
                        m_vring_iova, size);
@@ -799,14 +800,17 @@ vring_relay(void *arg)
 static int
 setup_vring_relay(struct ifcvf_internal *internal)
 {
+       char name[THREAD_NAME_LEN];
        int ret;
 
-       ret = pthread_create(&internal->tid, NULL, vring_relay,
-                       (void *)internal);
-       if (ret) {
+       snprintf(name, sizeof(name), "ifc-vring-%d", internal->vid);
+       ret = rte_ctrl_thread_create(&internal->tid, name, NULL, vring_relay,
+                                    (void *)internal);
+       if (ret != 0) {
                DRV_LOG(ERR, "failed to create ring relay pthread.");
                return -1;
        }
+
        return 0;
 }
 
@@ -839,12 +843,12 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal)
        vdpa_ifcvf_stop(internal);
        vdpa_disable_vfio_intr(internal);
 
-       ret = rte_vhost_host_notifier_ctrl(vid, false);
+       ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
        if (ret && ret != -ENOTSUP)
                goto error;
 
        /* set up interrupt for interrupt relay */
-       ret = vdpa_enable_vfio_intr(internal, 1);
+       ret = vdpa_enable_vfio_intr(internal, true);
        if (ret)
                goto unmap;
 
@@ -858,7 +862,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal)
        if (ret)
                goto stop_vf;
 
-       rte_vhost_host_notifier_ctrl(vid, true);
+       rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true);
 
        internal->sw_fallback_running = true;
 
@@ -869,7 +873,7 @@ stop_vf:
 unset_intr:
        vdpa_disable_vfio_intr(internal);
 unmap:
-       ifcvf_dma_map(internal, 0);
+       ifcvf_dma_map(internal, false);
 error:
        return -1;
 }
@@ -877,14 +881,14 @@ error:
 static int
 ifcvf_dev_config(int vid)
 {
-       int did;
+       struct rte_vdpa_device *vdev;
        struct internal_list *list;
        struct ifcvf_internal *internal;
 
-       did = rte_vhost_get_vdpa_device_id(vid);
-       list = find_internal_resource_by_did(did);
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -893,23 +897,25 @@ ifcvf_dev_config(int vid)
        rte_atomic32_set(&internal->dev_attached, 1);
        update_datapath(internal);
 
-       if (rte_vhost_host_notifier_ctrl(vid, true) != 0)
-               DRV_LOG(NOTICE, "vDPA (%d): software relay is used.", did);
+       if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
+               DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
+                               vdev->device->name);
 
+       internal->configured = 1;
        return 0;
 }
 
 static int
 ifcvf_dev_close(int vid)
 {
-       int did;
+       struct rte_vdpa_device *vdev;
        struct internal_list *list;
        struct ifcvf_internal *internal;
 
-       did = rte_vhost_get_vdpa_device_id(vid);
-       list = find_internal_resource_by_did(did);
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -926,7 +932,7 @@ ifcvf_dev_close(int vid)
                vdpa_disable_vfio_intr(internal);
 
                /* unset DMA map for guest memory */
-               ifcvf_dma_map(internal, 0);
+               ifcvf_dma_map(internal, false);
 
                internal->sw_fallback_running = false;
        } else {
@@ -934,6 +940,7 @@ ifcvf_dev_close(int vid)
                update_datapath(internal);
        }
 
+       internal->configured = 0;
        return 0;
 }
 
@@ -941,15 +948,15 @@ static int
 ifcvf_set_features(int vid)
 {
        uint64_t features = 0;
-       int did;
+       struct rte_vdpa_device *vdev;
        struct internal_list *list;
        struct ifcvf_internal *internal;
        uint64_t log_base = 0, log_size = 0;
 
-       did = rte_vhost_get_vdpa_device_id(vid);
-       list = find_internal_resource_by_did(did);
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -974,13 +981,13 @@ ifcvf_set_features(int vid)
 static int
 ifcvf_get_vfio_group_fd(int vid)
 {
-       int did;
+       struct rte_vdpa_device *vdev;
        struct internal_list *list;
 
-       did = rte_vhost_get_vdpa_device_id(vid);
-       list = find_internal_resource_by_did(did);
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -990,13 +997,13 @@ ifcvf_get_vfio_group_fd(int vid)
 static int
 ifcvf_get_vfio_device_fd(int vid)
 {
-       int did;
+       struct rte_vdpa_device *vdev;
        struct internal_list *list;
 
-       did = rte_vhost_get_vdpa_device_id(vid);
-       list = find_internal_resource_by_did(did);
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -1006,16 +1013,16 @@ ifcvf_get_vfio_device_fd(int vid)
 static int
 ifcvf_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
 {
-       int did;
+       struct rte_vdpa_device *vdev;
        struct internal_list *list;
        struct ifcvf_internal *internal;
        struct vfio_region_info reg = { .argsz = sizeof(reg) };
        int ret;
 
-       did = rte_vhost_get_vdpa_device_id(vid);
-       list = find_internal_resource_by_did(did);
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -1036,13 +1043,13 @@ ifcvf_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
 }
 
 static int
-ifcvf_get_queue_num(int did, uint32_t *queue_num)
+ifcvf_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
 {
        struct internal_list *list;
 
-       list = find_internal_resource_by_did(did);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -1052,13 +1059,13 @@ ifcvf_get_queue_num(int did, uint32_t *queue_num)
 }
 
 static int
-ifcvf_get_vdpa_features(int did, uint64_t *features)
+ifcvf_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
        struct internal_list *list;
 
-       list = find_internal_resource_by_did(did);
+       list = find_internal_resource_by_vdev(vdev);
        if (list == NULL) {
-               DRV_LOG(ERR, "Invalid device id: %d", did);
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
                return -1;
        }
 
@@ -1072,21 +1079,72 @@ ifcvf_get_vdpa_features(int did, uint64_t *features)
                 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | \
                 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
                 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
-                1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+                1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+                1ULL << VHOST_USER_PROTOCOL_F_STATUS)
 static int
-ifcvf_get_protocol_features(int did __rte_unused, uint64_t *features)
+ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
+       RTE_SET_USED(vdev);
+
        *features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
        return 0;
 }
 
+static int
+ifcvf_set_vring_state(int vid, int vring, int state)
+{
+       struct rte_vdpa_device *vdev;
+       struct internal_list *list;
+       struct ifcvf_internal *internal;
+       struct ifcvf_hw *hw;
+       struct ifcvf_pci_common_cfg *cfg;
+       int ret = 0;
+
+       vdev = rte_vhost_get_vdpa_device(vid);
+       list = find_internal_resource_by_vdev(vdev);
+       if (list == NULL) {
+               DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+               return -1;
+       }
+
+       internal = list->internal;
+       if (vring < 0 || vring >= internal->max_queues * 2) {
+               DRV_LOG(ERR, "Vring index %d not correct", vring);
+               return -1;
+       }
+
+       hw = &internal->hw;
+       if (!internal->configured)
+               goto exit;
+
+       cfg = hw->common_cfg;
+       IFCVF_WRITE_REG16(vring, &cfg->queue_select);
+       IFCVF_WRITE_REG16(!!state, &cfg->queue_enable);
+
+       if (!state && hw->vring[vring].enable) {
+               ret = vdpa_disable_vfio_intr(internal);
+               if (ret)
+                       return ret;
+       }
+
+       if (state && !hw->vring[vring].enable) {
+               ret = vdpa_enable_vfio_intr(internal, false);
+               if (ret)
+                       return ret;
+       }
+
+exit:
+       hw->vring[vring].enable = !!state;
+       return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_ops = {
        .get_queue_num = ifcvf_get_queue_num,
        .get_features = ifcvf_get_vdpa_features,
        .get_protocol_features = ifcvf_get_protocol_features,
        .dev_conf = ifcvf_dev_config,
        .dev_close = ifcvf_dev_close,
-       .set_vring_state = NULL,
+       .set_vring_state = ifcvf_set_vring_state,
        .set_features = ifcvf_set_features,
        .migration_done = NULL,
        .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
@@ -1166,6 +1224,7 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                goto error;
        }
 
+       internal->configured = 0;
        internal->max_queues = IFCVF_MAX_QUEUES;
        features = ifcvf_get_features(&internal->hw);
        internal->features = (features &
@@ -1176,8 +1235,6 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
                (1ULL << VHOST_F_LOG_ALL);
 
-       internal->dev_addr.pci_addr = pci_dev->addr;
-       internal->dev_addr.type = PCI_ADDR;
        list->internal = internal;
 
        if (rte_kvargs_count(kvlist, IFCVF_SW_FALLBACK_LM)) {
@@ -1188,9 +1245,8 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        }
        internal->sw_lm = sw_fallback_lm;
 
-       internal->did = rte_vdpa_register_device(&internal->dev_addr,
-                               &ifcvf_ops);
-       if (internal->did < 0) {
+       internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+       if (internal->vdev == NULL) {
                DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
                goto error;
        }
@@ -1233,7 +1289,7 @@ ifcvf_pci_remove(struct rte_pci_device *pci_dev)
 
        rte_pci_unmap_device(internal->pdev);
        rte_vfio_container_destroy(internal->vfio_container_fd);
-       rte_vdpa_unregister_device(internal->did);
+       rte_vdpa_unregister_device(internal->vdev);
 
        pthread_mutex_lock(&internal_list_lock);
        TAILQ_REMOVE(&internal_list, list, next);
@@ -1271,10 +1327,3 @@ static struct rte_pci_driver rte_ifcvf_vdpa = {
 RTE_PMD_REGISTER_PCI(net_ifcvf, rte_ifcvf_vdpa);
 RTE_PMD_REGISTER_PCI_TABLE(net_ifcvf, pci_id_ifcvf_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_ifcvf, "* vfio-pci");
-
-RTE_INIT(ifcvf_vdpa_init_log)
-{
-       ifcvf_vdpa_logtype = rte_log_register("pmd.net.ifcvf_vdpa");
-       if (ifcvf_vdpa_logtype >= 0)
-               rte_log_set_level(ifcvf_vdpa_logtype, RTE_LOG_NOTICE);
-}