vhost: remove vDPA device count API
[dpdk.git] / lib / librte_vhost / vdpa.c
index 77d92f9..e41c73e 100644 (file)
  */
 
 #include <stdbool.h>
+#include <sys/queue.h>
 
+#include <rte_class.h>
 #include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
 #include "rte_vdpa.h"
 #include "vhost.h"
 
-static struct rte_vdpa_device *vdpa_devices[MAX_VHOST_DEVICE];
-static uint32_t vdpa_device_num;
+/** Double linked list of vDPA devices. */
+TAILQ_HEAD(vdpa_device_list, rte_vdpa_device);
 
-static bool
-is_same_vdpa_device(struct rte_vdpa_dev_addr *a,
-               struct rte_vdpa_dev_addr *b)
-{
-       bool ret = true;
-
-       if (a->type != b->type)
-               return false;
-
-       switch (a->type) {
-       case PCI_ADDR:
-               if (a->pci_addr.domain != b->pci_addr.domain ||
-                               a->pci_addr.bus != b->pci_addr.bus ||
-                               a->pci_addr.devid != b->pci_addr.devid ||
-                               a->pci_addr.function != b->pci_addr.function)
-                       ret = false;
-               break;
-       default:
-               break;
-       }
+static struct vdpa_device_list vdpa_device_list =
+               TAILQ_HEAD_INITIALIZER(vdpa_device_list);
+static rte_spinlock_t vdpa_device_list_lock = RTE_SPINLOCK_INITIALIZER;
 
-       return ret;
-}
 
-int
-rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
-               struct rte_vdpa_dev_ops *ops)
+/* Unsafe, needs to be called with vdpa_device_list_lock held */
+static struct rte_vdpa_device *
+__vdpa_find_device_by_name(const char *name)
 {
-       struct rte_vdpa_device *dev;
-       char device_name[MAX_VDPA_NAME_LEN];
-       int i;
-
-       if (vdpa_device_num >= MAX_VHOST_DEVICE)
-               return -1;
+       struct rte_vdpa_device *dev, *ret = NULL;
 
-       for (i = 0; i < MAX_VHOST_DEVICE; i++) {
-               dev = vdpa_devices[i];
-               if (dev && is_same_vdpa_device(&dev->addr, addr))
-                       return -1;
-       }
+       if (name == NULL)
+               return NULL;
 
-       for (i = 0; i < MAX_VHOST_DEVICE; i++) {
-               if (vdpa_devices[i] == NULL)
+       TAILQ_FOREACH(dev, &vdpa_device_list, next) {
+               if (!strncmp(dev->device->name, name, RTE_DEV_NAME_MAX_LEN)) {
+                       ret = dev;
                        break;
+               }
        }
 
-       if (i == MAX_VHOST_DEVICE)
-               return -1;
+       return ret;
+}
 
-       sprintf(device_name, "vdpa-dev-%d", i);
-       dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),
-                       RTE_CACHE_LINE_SIZE);
-       if (!dev)
-               return -1;
+struct rte_vdpa_device *
+rte_vdpa_find_device_by_name(const char *name)
+{
+       struct rte_vdpa_device *dev;
 
-       memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));
-       dev->ops = ops;
-       vdpa_devices[i] = dev;
-       vdpa_device_num++;
+       rte_spinlock_lock(&vdpa_device_list_lock);
+       dev = __vdpa_find_device_by_name(name);
+       rte_spinlock_unlock(&vdpa_device_list_lock);
 
-       return i;
+       return dev;
 }
 
-int
-rte_vdpa_unregister_device(int did)
+struct rte_device *
+rte_vdpa_get_rte_device(struct rte_vdpa_device *vdpa_dev)
 {
-       if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL)
-               return -1;
-
-       rte_free(vdpa_devices[did]);
-       vdpa_devices[did] = NULL;
-       vdpa_device_num--;
+       if (vdpa_dev == NULL)
+               return NULL;
 
-       return did;
+       return vdpa_dev->device;
 }
 
-int
-rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
+struct rte_vdpa_device *
+rte_vdpa_register_device(struct rte_device *rte_dev,
+               struct rte_vdpa_dev_ops *ops)
 {
        struct rte_vdpa_device *dev;
-       int i;
 
-       for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
-               dev = vdpa_devices[i];
-               if (dev && is_same_vdpa_device(&dev->addr, addr))
-                       return i;
+       if (ops == NULL)
+               return NULL;
+
+       rte_spinlock_lock(&vdpa_device_list_lock);
+       /* Check the device hasn't been register already */
+       dev = __vdpa_find_device_by_name(rte_dev->name);
+       if (dev) {
+               dev = NULL;
+               goto out_unlock;
        }
 
-       return -1;
-}
+       dev = rte_zmalloc(NULL, sizeof(*dev), 0);
+       if (!dev)
+               goto out_unlock;
 
-struct rte_vdpa_device *
-rte_vdpa_get_device(int did)
-{
-       if (did < 0 || did >= MAX_VHOST_DEVICE)
-               return NULL;
+       dev->device = rte_dev;
+       dev->ops = ops;
+       TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next);
+out_unlock:
+       rte_spinlock_unlock(&vdpa_device_list_lock);
 
-       return vdpa_devices[did];
+       return dev;
 }
 
 int
-rte_vdpa_get_device_num(void)
-{
-       return vdpa_device_num;
-}
-
-static bool
-invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq,
-               uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
+rte_vdpa_unregister_device(struct rte_vdpa_device *dev)
 {
-       uint64_t desc_addr, desc_chunck_len;
-
-       while (desc_len) {
-               desc_chunck_len = desc_len;
-               desc_addr = vhost_iova_to_vva(dev, vq,
-                               desc_iova,
-                               &desc_chunck_len,
-                               perm);
+       struct rte_vdpa_device *cur_dev, *tmp_dev;
+       int ret = -1;
 
-               if (!desc_addr)
-                       return true;
+       rte_spinlock_lock(&vdpa_device_list_lock);
+       TAILQ_FOREACH_SAFE(cur_dev, &vdpa_device_list, next, tmp_dev) {
+               if (dev != cur_dev)
+                       continue;
 
-               desc_len -= desc_chunck_len;
-               desc_iova += desc_chunck_len;
+               TAILQ_REMOVE(&vdpa_device_list, dev, next);
+               rte_free(dev);
+               ret = 0;
+               break;
        }
+       rte_spinlock_unlock(&vdpa_device_list_lock);
 
-       return false;
+       return ret;
 }
 
-int __rte_experimental
-rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
+int
+rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
 {
        struct virtio_net *dev = get_device(vid);
        uint16_t idx, idx_m, desc_id;
-       struct vring_desc desc;
        struct vhost_virtqueue *vq;
+       struct vring_desc desc;
        struct vring_desc *desc_ring;
        struct vring_desc *idesc = NULL;
        struct vring *s_vring;
        uint64_t dlen;
+       uint32_t nr_descs;
        int ret;
-       uint8_t perm;
 
        if (!dev || !vring_m)
                return -1;
@@ -171,18 +143,28 @@ rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
 
        s_vring = (struct vring *)vring_m;
        vq = dev->virtqueue[qid];
-       idx = vq->avail->idx;
-       idx_m = s_vring->avail->idx;
-       ret = (uint16_t)(idx - idx_m);
-
-       while (idx_m != idx) {
-               /* avail entry copy */
-               desc_id = vq->avail->ring[idx_m & (vq->size - 1)];
-               s_vring->avail->ring[idx_m & (vq->size - 1)] = desc_id;
+       idx = vq->used->idx;
+       idx_m = s_vring->used->idx;
+       ret = (uint16_t)(idx_m - idx);
+
+       while (idx != idx_m) {
+               /* copy used entry, used ring logging is not covered here */
+               vq->used->ring[idx & (vq->size - 1)] =
+                       s_vring->used->ring[idx & (vq->size - 1)];
+
+               desc_id = vq->used->ring[idx & (vq->size - 1)].id;
                desc_ring = vq->desc;
+               nr_descs = vq->size;
+
+               if (unlikely(desc_id >= vq->size))
+                       return -1;
 
                if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
                        dlen = vq->desc[desc_id].len;
+                       nr_descs = dlen / sizeof(struct vring_desc);
+                       if (unlikely(nr_descs > vq->size))
+                               return -1;
+
                        desc_ring = (struct vring_desc *)(uintptr_t)
                                vhost_iova_to_vva(dev, vq,
                                                vq->desc[desc_id].addr, &dlen,
@@ -191,7 +173,7 @@ rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
                                return -1;
 
                        if (unlikely(dlen < vq->desc[desc_id].len)) {
-                               idesc = alloc_copy_ind_table(dev, vq,
+                               idesc = vhost_alloc_copy_ind_table(dev, vq,
                                                vq->desc[desc_id].addr,
                                                vq->desc[desc_id].len);
                                if (unlikely(!idesc))
@@ -203,17 +185,16 @@ rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
                        desc_id = 0;
                }
 
-               /* check if the buf addr is within the guest memory */
+               /* dirty page logging for DMA writeable buffer */
                do {
+                       if (unlikely(desc_id >= vq->size))
+                               goto fail;
+                       if (unlikely(nr_descs-- == 0))
+                               goto fail;
                        desc = desc_ring[desc_id];
-                       perm = desc.flags & VRING_DESC_F_WRITE ?
-                               VHOST_ACCESS_WO : VHOST_ACCESS_RO;
-                       if (invalid_desc_check(dev, vq, desc.addr, desc.len,
-                                               perm)) {
-                               if (unlikely(idesc))
-                                       free_ind_table(idesc);
-                               return -1;
-                       }
+                       if (desc.flags & VRING_DESC_F_WRITE)
+                               vhost_log_write_iova(dev, vq, desc.addr,
+                                                    desc.len);
                        desc_id = desc.next;
                } while (desc.flags & VRING_DESC_F_NEXT);
 
@@ -222,97 +203,140 @@ rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
                        idesc = NULL;
                }
 
-               idx_m++;
+               idx++;
        }
 
        rte_smp_wmb();
-       s_vring->avail->idx = idx;
+       vq->used->idx = idx_m;
 
        if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
-               vhost_avail_event(vq) = idx;
+               vring_used_event(s_vring) = idx_m;
 
        return ret;
+
+fail:
+       if (unlikely(idesc))
+               free_ind_table(idesc);
+       return -1;
 }
 
-int __rte_experimental
-rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
+int
+rte_vdpa_get_queue_num(struct rte_vdpa_device *dev, uint32_t *queue_num)
 {
-       struct virtio_net *dev = get_device(vid);
-       uint16_t idx, idx_m, desc_id;
-       struct vhost_virtqueue *vq;
-       struct vring_desc desc;
-       struct vring_desc *desc_ring;
-       struct vring_desc *idesc = NULL;
-       struct vring *s_vring;
-       uint64_t dlen;
-       int ret;
-
-       if (!dev || !vring_m)
+       if (dev == NULL || dev->ops == NULL || dev->ops->get_queue_num == NULL)
                return -1;
 
-       if (qid >= dev->nr_vring)
+       return dev->ops->get_queue_num(dev, queue_num);
+}
+
+int
+rte_vdpa_get_features(struct rte_vdpa_device *dev, uint64_t *features)
+{
+       if (dev == NULL || dev->ops == NULL || dev->ops->get_features == NULL)
                return -1;
 
-       if (vq_is_packed(dev))
+       return dev->ops->get_features(dev, features);
+}
+
+int
+rte_vdpa_get_protocol_features(struct rte_vdpa_device *dev, uint64_t *features)
+{
+       if (dev == NULL || dev->ops == NULL ||
+                       dev->ops->get_protocol_features == NULL)
                return -1;
 
-       s_vring = (struct vring *)vring_m;
-       vq = dev->virtqueue[qid];
-       idx = vq->used->idx;
-       idx_m = s_vring->used->idx;
-       ret = (uint16_t)(idx_m - idx);
+       return dev->ops->get_protocol_features(dev, features);
+}
 
-       while (idx != idx_m) {
-               /* copy used entry, used ring logging is not covered here */
-               vq->used->ring[idx & (vq->size - 1)] =
-                       s_vring->used->ring[idx & (vq->size - 1)];
+int
+rte_vdpa_get_stats_names(struct rte_vdpa_device *dev,
+               struct rte_vdpa_stat_name *stats_names,
+               unsigned int size)
+{
+       if (!dev)
+               return -EINVAL;
 
-               desc_id = vq->used->ring[idx & (vq->size - 1)].id;
-               desc_ring = vq->desc;
+       RTE_FUNC_PTR_OR_ERR_RET(dev->ops->get_stats_names, -ENOTSUP);
 
-               if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
-                       dlen = vq->desc[desc_id].len;
-                       desc_ring = (struct vring_desc *)(uintptr_t)
-                               vhost_iova_to_vva(dev, vq,
-                                               vq->desc[desc_id].addr, &dlen,
-                                               VHOST_ACCESS_RO);
-                       if (unlikely(!desc_ring))
-                               return -1;
+       return dev->ops->get_stats_names(dev, stats_names, size);
+}
 
-                       if (unlikely(dlen < vq->desc[desc_id].len)) {
-                               idesc = alloc_copy_ind_table(dev, vq,
-                                               vq->desc[desc_id].addr,
-                                               vq->desc[desc_id].len);
-                               if (unlikely(!idesc))
-                                       return -1;
+int
+rte_vdpa_get_stats(struct rte_vdpa_device *dev, uint16_t qid,
+               struct rte_vdpa_stat *stats, unsigned int n)
+{
+       if (!dev || !stats || !n)
+               return -EINVAL;
 
-                               desc_ring = idesc;
-                       }
+       RTE_FUNC_PTR_OR_ERR_RET(dev->ops->get_stats, -ENOTSUP);
 
-                       desc_id = 0;
-               }
+       return dev->ops->get_stats(dev, qid, stats, n);
+}
 
-               /* dirty page logging for DMA writeable buffer */
-               do {
-                       desc = desc_ring[desc_id];
-                       if (desc.flags & VRING_DESC_F_WRITE)
-                               vhost_log_write(dev, desc.addr, desc.len);
-                       desc_id = desc.next;
-               } while (desc.flags & VRING_DESC_F_NEXT);
+int
+rte_vdpa_reset_stats(struct rte_vdpa_device *dev, uint16_t qid)
+{
+       if (!dev)
+               return -EINVAL;
 
-               if (unlikely(idesc)) {
-                       free_ind_table(idesc);
-                       idesc = NULL;
-               }
+       RTE_FUNC_PTR_OR_ERR_RET(dev->ops->reset_stats, -ENOTSUP);
 
-               idx++;
+       return dev->ops->reset_stats(dev, qid);
+}
+
+static int
+vdpa_dev_match(struct rte_vdpa_device *dev,
+             const struct rte_device *rte_dev)
+{
+       if (dev->device == rte_dev)
+               return 0;
+
+       return -1;
+}
+
+/* Generic rte_vdpa_dev comparison function. */
+typedef int (*rte_vdpa_cmp_t)(struct rte_vdpa_device *,
+               const struct rte_device *rte_dev);
+
+static struct rte_vdpa_device *
+vdpa_find_device(const struct rte_vdpa_device *start, rte_vdpa_cmp_t cmp,
+               struct rte_device *rte_dev)
+{
+       struct rte_vdpa_device *dev;
+
+       rte_spinlock_lock(&vdpa_device_list_lock);
+       if (start == NULL)
+               dev = TAILQ_FIRST(&vdpa_device_list);
+       else
+               dev = TAILQ_NEXT(start, next);
+
+       while (dev != NULL) {
+               if (cmp(dev, rte_dev) == 0)
+                       break;
+
+               dev = TAILQ_NEXT(dev, next);
        }
+       rte_spinlock_unlock(&vdpa_device_list_lock);
 
-       rte_smp_wmb();
-       vq->used->idx = idx_m;
+       return dev;
+}
 
-       if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
-               vring_used_event(s_vring) = idx_m;
+static void *
+vdpa_dev_iterate(const void *start,
+               const char *str,
+               const struct rte_dev_iterator *it)
+{
+       struct rte_vdpa_device *vdpa_dev = NULL;
 
-       return ret;
+       RTE_SET_USED(str);
+
+       vdpa_dev = vdpa_find_device(start, vdpa_dev_match, it->device);
+
+       return vdpa_dev;
 }
+
+static struct rte_class rte_class_vdpa = {
+       .dev_iterate = vdpa_dev_iterate,
+};
+
+RTE_REGISTER_CLASS(vdpa, rte_class_vdpa);