+ uint16_t i;
+ uint16_t resubmit_num = 0, last_io, num;
+ struct vring_used *used = vq->used;
+ struct rte_vhost_resubmit_info *resubmit;
+ struct rte_vhost_inflight_info_split *inflight_split;
+
+ if (!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ /* The frontend may still not support the inflight feature
+ * although we negotiate the protocol feature.
+ */
+ if ((!vq->inflight_split))
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ if (!vq->inflight_split->version) {
+ vq->inflight_split->version = INFLIGHT_VERSION;
+ return RTE_VHOST_MSG_RESULT_OK;
+ }
+
+ if (vq->resubmit_inflight)
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ inflight_split = vq->inflight_split;
+ vq->global_counter = 0;
+ last_io = inflight_split->last_inflight_io;
+
+ if (inflight_split->used_idx != used->idx) {
+ inflight_split->desc[last_io].inflight = 0;
+ rte_smp_mb();
+ inflight_split->used_idx = used->idx;
+ }
+
+ for (i = 0; i < inflight_split->desc_num; i++) {
+ if (inflight_split->desc[i].inflight == 1)
+ resubmit_num++;
+ }
+
+ vq->last_avail_idx += resubmit_num;
+
+ if (resubmit_num) {
+ resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+ if (!resubmit) {
+ VHOST_LOG_CONFIG(ERR,
+ "failed to allocate memory for resubmit info.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ resubmit->resubmit_list = calloc(resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc));
+ if (!resubmit->resubmit_list) {
+ VHOST_LOG_CONFIG(ERR,
+ "failed to allocate memory for inflight desc.\n");
+ free(resubmit);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ num = 0;
+ for (i = 0; i < vq->inflight_split->desc_num; i++) {
+ if (vq->inflight_split->desc[i].inflight == 1) {
+ resubmit->resubmit_list[num].index = i;
+ resubmit->resubmit_list[num].counter =
+ inflight_split->desc[i].counter;
+ num++;
+ }
+ }
+ resubmit->resubmit_num = num;
+
+ if (resubmit->resubmit_num > 1)
+ qsort(resubmit->resubmit_list, resubmit->resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc),
+ resubmit_desc_compare);
+
+ vq->global_counter = resubmit->resubmit_list[0].counter + 1;
+ vq->resubmit_inflight = resubmit;
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_check_queue_inflights_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq)
+{
+ uint16_t i;
+ uint16_t resubmit_num = 0, old_used_idx, num;
+ struct rte_vhost_resubmit_info *resubmit;
+ struct rte_vhost_inflight_info_packed *inflight_packed;
+
+ if (!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ /* The frontend may still not support the inflight feature
+ * although we negotiate the protocol feature.
+ */
+ if ((!vq->inflight_packed))
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ if (!vq->inflight_packed->version) {
+ vq->inflight_packed->version = INFLIGHT_VERSION;
+ return RTE_VHOST_MSG_RESULT_OK;
+ }
+
+ if (vq->resubmit_inflight)
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ inflight_packed = vq->inflight_packed;
+ vq->global_counter = 0;
+ old_used_idx = inflight_packed->old_used_idx;
+
+ if (inflight_packed->used_idx != old_used_idx) {
+ if (inflight_packed->desc[old_used_idx].inflight == 0) {
+ inflight_packed->old_used_idx =
+ inflight_packed->used_idx;
+ inflight_packed->old_used_wrap_counter =
+ inflight_packed->used_wrap_counter;
+ inflight_packed->old_free_head =
+ inflight_packed->free_head;
+ } else {
+ inflight_packed->used_idx =
+ inflight_packed->old_used_idx;
+ inflight_packed->used_wrap_counter =
+ inflight_packed->old_used_wrap_counter;
+ inflight_packed->free_head =
+ inflight_packed->old_free_head;
+ }
+ }
+
+ for (i = 0; i < inflight_packed->desc_num; i++) {
+ if (inflight_packed->desc[i].inflight == 1)
+ resubmit_num++;
+ }
+
+ if (resubmit_num) {
+ resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+ if (resubmit == NULL) {
+ VHOST_LOG_CONFIG(ERR,
+ "failed to allocate memory for resubmit info.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ resubmit->resubmit_list = calloc(resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc));
+ if (resubmit->resubmit_list == NULL) {
+ VHOST_LOG_CONFIG(ERR,
+ "failed to allocate memory for resubmit desc.\n");
+ free(resubmit);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ num = 0;
+ for (i = 0; i < inflight_packed->desc_num; i++) {
+ if (vq->inflight_packed->desc[i].inflight == 1) {
+ resubmit->resubmit_list[num].index = i;
+ resubmit->resubmit_list[num].counter =
+ inflight_packed->desc[i].counter;
+ num++;
+ }
+ }
+ resubmit->resubmit_num = num;
+
+ if (resubmit->resubmit_num > 1)
+ qsort(resubmit->resubmit_list, resubmit->resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc),
+ resubmit_desc_compare);
+
+ vq->global_counter = resubmit->resubmit_list[0].counter + 1;
+ vq->resubmit_inflight = resubmit;
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ struct vhost_vring_file file;
+ struct vhost_virtqueue *vq;
+ int expected_fds;
+
+ expected_fds = (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) ? 0 : 1;
+ if (validate_msg_fds(msg, expected_fds) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = msg->fds[0];
+ VHOST_LOG_CONFIG(INFO,
+ "vring kick idx:%d file:%d\n", file.index, file.fd);
+
+ /* Interpret ring addresses only when ring is started. */
+ dev = translate_ring_addresses(dev, file.index);
+ if (!dev) {
+ if (file.fd != VIRTIO_INVALID_EVENTFD)
+ close(file.fd);
+
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ *pdev = dev;
+
+ vq = dev->virtqueue[file.index];
+
+ /*
+ * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated,
+ * the ring starts already enabled. Otherwise, it is enabled via
+ * the SET_VRING_ENABLE message.
+ */
+ if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
+ vq->enabled = 1;
+ if (dev->notify_ops->vring_state_changed)
+ dev->notify_ops->vring_state_changed(
+ dev->vid, file.index, 1);
+ }
+
+ if (vq->ready) {
+ vq->ready = 0;
+ vhost_user_notify_queue_state(dev, file.index, 0);
+ }
+
+ if (vq->kickfd >= 0)
+ close(vq->kickfd);
+ vq->kickfd = file.fd;
+
+ if (vq_is_packed(dev)) {
+ if (vhost_check_queue_inflights_packed(dev, vq)) {
+ VHOST_LOG_CONFIG(ERR,
+ "failed to inflights for vq: %d\n", file.index);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ } else {
+ if (vhost_check_queue_inflights_split(dev, vq)) {
+ VHOST_LOG_CONFIG(ERR,
+ "failed to inflights for vq: %d\n", file.index);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+static int
+vhost_user_get_vring_base(struct virtio_net **pdev,
+ struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
+ uint64_t val;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ /* We have to stop the queue (virtio) if it is running. */
+ vhost_destroy_device_notify(dev);
+
+ dev->flags &= ~VIRTIO_DEV_READY;
+ dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
+
+ /* Here we are safe to get the indexes */
+ if (vq_is_packed(dev)) {
+ /*
+ * Bit[0:14]: avail index
+ * Bit[15]: avail wrap counter
+ */
+ val = vq->last_avail_idx & 0x7fff;
+ val |= vq->avail_wrap_counter << 15;
+ msg->payload.state.num = val;
+ } else {
+ msg->payload.state.num = vq->last_avail_idx;
+ }
+
+ VHOST_LOG_CONFIG(INFO,
+ "vring base idx:%d file:%d\n", msg->payload.state.index,
+ msg->payload.state.num);
+ /*
+ * Based on current qemu vhost-user implementation, this message is
+ * sent and only sent in vhost_vring_stop.
+ * TODO: cleanup the vring, it isn't usable since here.
+ */
+ if (vq->kickfd >= 0)
+ close(vq->kickfd);
+
+ vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+ if (vq->callfd >= 0)
+ close(vq->callfd);
+
+ vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+ vq->signalled_used_valid = false;
+
+ if (vq_is_packed(dev)) {
+ rte_free(vq->shadow_used_packed);
+ vq->shadow_used_packed = NULL;
+ } else {
+ rte_free(vq->shadow_used_split);
+ vq->shadow_used_split = NULL;
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+ if (vq->async_pkts_info)
+ rte_free(vq->async_pkts_info);
+ vq->async_pkts_pending = NULL;
+ vq->async_pkts_info = NULL;
+ }
+
+ rte_free(vq->batch_copy_elems);
+ vq->batch_copy_elems = NULL;
+
+ msg->size = sizeof(msg->payload.state);
+ msg->fd_num = 0;
+
+ vring_invalidate(dev, vq);
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+static int
+vhost_user_set_vring_enable(struct virtio_net **pdev,
+ struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ int enable = (int)msg->payload.state.num;
+ int index = (int)msg->payload.state.index;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ VHOST_LOG_CONFIG(INFO,
+ "set queue enable: %d to qp idx: %d\n",
+ enable, index);
+
+ if (enable && dev->virtqueue[index]->async_registered) {
+ if (dev->virtqueue[index]->async_pkts_inflight_n) {
+ VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
+ "async inflight packets must be completed first\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ }
+
+ dev->virtqueue[index]->enabled = enable;
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_get_protocol_features(struct virtio_net **pdev,
+ struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ uint64_t features, protocol_features;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ rte_vhost_driver_get_features(dev->ifname, &features);
+ rte_vhost_driver_get_protocol_features(dev->ifname, &protocol_features);
+
+ msg->payload.u64 = protocol_features;
+ msg->size = sizeof(msg->payload.u64);
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_protocol_features(struct virtio_net **pdev,
+ struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ uint64_t protocol_features = msg->payload.u64;
+ uint64_t slave_protocol_features = 0;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ rte_vhost_driver_get_protocol_features(dev->ifname,
+ &slave_protocol_features);
+ if (protocol_features & ~slave_protocol_features) {
+ VHOST_LOG_CONFIG(ERR,
+ "(%d) received invalid protocol features.\n",
+ dev->vid);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ dev->protocol_features = protocol_features;
+ VHOST_LOG_CONFIG(INFO,
+ "negotiated Vhost-user protocol features: 0x%" PRIx64 "\n",
+ dev->protocol_features);
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ int fd = msg->fds[0];
+ uint64_t size, off;
+ void *addr;
+
+ if (validate_msg_fds(msg, 1) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ if (fd < 0) {
+ VHOST_LOG_CONFIG(ERR, "invalid log fd: %d\n", fd);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ if (msg->size != sizeof(VhostUserLog)) {
+ VHOST_LOG_CONFIG(ERR,
+ "invalid log base msg size: %"PRId32" != %d\n",
+ msg->size, (int)sizeof(VhostUserLog));
+ goto close_msg_fds;
+ }
+
+ size = msg->payload.log.mmap_size;
+ off = msg->payload.log.mmap_offset;
+
+ /* Check for mmap size and offset overflow. */
+ if (off >= -size) {
+ VHOST_LOG_CONFIG(ERR,
+ "log offset %#"PRIx64" and log size %#"PRIx64" overflow\n",
+ off, size);
+ goto close_msg_fds;
+ }
+
+ VHOST_LOG_CONFIG(INFO,
+ "log mmap size: %"PRId64", offset: %"PRId64"\n",
+ size, off);
+
+ /*
+ * mmap from 0 to workaround a hugepage mmap bug: mmap will
+ * fail when offset is not page size aligned.
+ */
+ addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ if (addr == MAP_FAILED) {
+ VHOST_LOG_CONFIG(ERR, "mmap log base failed!\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ /*
+ * Free previously mapped log memory on occasionally
+ * multiple VHOST_USER_SET_LOG_BASE.
+ */
+ if (dev->log_addr) {
+ munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+ }
+ dev->log_addr = (uint64_t)(uintptr_t)addr;
+ dev->log_base = dev->log_addr + off;
+ dev->log_size = size;
+
+ /*
+ * The spec is not clear about it (yet), but QEMU doesn't expect
+ * any payload in the reply.
+ */
+ msg->size = 0;
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+
+close_msg_fds:
+ close_msg_fds(msg);
+ return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int vhost_user_set_log_fd(struct virtio_net **pdev __rte_unused,
+ struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ if (validate_msg_fds(msg, 1) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ close(msg->fds[0]);
+ VHOST_LOG_CONFIG(INFO, "not implemented.\n");
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+static int
+vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ uint8_t *mac = (uint8_t *)&msg->payload.u64;
+ struct rte_vdpa_device *vdpa_dev;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ VHOST_LOG_CONFIG(DEBUG,
+ ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ memcpy(dev->mac.addr_bytes, mac, 6);
+
+ /*
+ * Set the flag to inject a RARP broadcast packet at
+ * rte_vhost_dequeue_burst().
+ *
+ * __ATOMIC_RELEASE ordering is for making sure the mac is
+ * copied before the flag is set.
+ */
+ __atomic_store_n(&dev->broadcast_rarp, 1, __ATOMIC_RELEASE);
+ vdpa_dev = dev->vdpa_dev;
+ if (vdpa_dev && vdpa_dev->ops->migration_done)
+ vdpa_dev->ops->migration_done(dev->vid);
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_net_set_mtu(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ if (msg->payload.u64 < VIRTIO_MIN_MTU ||
+ msg->payload.u64 > VIRTIO_MAX_MTU) {
+ VHOST_LOG_CONFIG(ERR, "Invalid MTU size (%"PRIu64")\n",
+ msg->payload.u64);
+
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ dev->mtu = msg->payload.u64;
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_set_req_fd(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ int fd = msg->fds[0];
+
+ if (validate_msg_fds(msg, 1) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ if (fd < 0) {
+ VHOST_LOG_CONFIG(ERR,
+ "Invalid file descriptor for slave channel (%d)\n",
+ fd);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ if (dev->slave_req_fd >= 0)
+ close(dev->slave_req_fd);
+
+ dev->slave_req_fd = fd;
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+is_vring_iotlb_split(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
+{
+ struct vhost_vring_addr *ra;
+ uint64_t start, end, len;
+
+ start = imsg->iova;
+ end = start + imsg->size;
+
+ ra = &vq->ring_addrs;
+ len = sizeof(struct vring_desc) * vq->size;
+ if (ra->desc_user_addr < end && (ra->desc_user_addr + len) > start)
+ return 1;
+
+ len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
+ if (ra->avail_user_addr < end && (ra->avail_user_addr + len) > start)
+ return 1;
+
+ len = sizeof(struct vring_used) +
+ sizeof(struct vring_used_elem) * vq->size;
+ if (ra->used_user_addr < end && (ra->used_user_addr + len) > start)
+ return 1;
+
+ if (ra->flags & (1 << VHOST_VRING_F_LOG)) {
+ len = sizeof(uint64_t);
+ if (ra->log_guest_addr < end &&
+ (ra->log_guest_addr + len) > start)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+is_vring_iotlb_packed(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
+{
+ struct vhost_vring_addr *ra;
+ uint64_t start, end, len;
+
+ start = imsg->iova;
+ end = start + imsg->size;
+
+ ra = &vq->ring_addrs;
+ len = sizeof(struct vring_packed_desc) * vq->size;
+ if (ra->desc_user_addr < end && (ra->desc_user_addr + len) > start)
+ return 1;
+
+ len = sizeof(struct vring_packed_desc_event);
+ if (ra->avail_user_addr < end && (ra->avail_user_addr + len) > start)
+ return 1;
+
+ len = sizeof(struct vring_packed_desc_event);
+ if (ra->used_user_addr < end && (ra->used_user_addr + len) > start)
+ return 1;
+
+ if (ra->flags & (1 << VHOST_VRING_F_LOG)) {
+ len = sizeof(uint64_t);
+ if (ra->log_guest_addr < end &&
+ (ra->log_guest_addr + len) > start)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int is_vring_iotlb(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct vhost_iotlb_msg *imsg)
+{
+ if (vq_is_packed(dev))
+ return is_vring_iotlb_packed(vq, imsg);
+ else
+ return is_vring_iotlb_split(vq, imsg);
+}
+
+static int
+vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+ struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
+ uint16_t i;
+ uint64_t vva, len;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ switch (imsg->type) {
+ case VHOST_IOTLB_UPDATE:
+ len = imsg->size;
+ vva = qva_to_vva(dev, imsg->uaddr, &len);
+ if (!vva)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ for (i = 0; i < dev->nr_vring; i++) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ if (!vq)
+ continue;
+
+ vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
+ len, imsg->perm);
+
+ if (is_vring_iotlb(dev, vq, imsg))
+ *pdev = dev = translate_ring_addresses(dev, i);
+ }
+ break;
+ case VHOST_IOTLB_INVALIDATE:
+ for (i = 0; i < dev->nr_vring; i++) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ if (!vq)
+ continue;
+
+ vhost_user_iotlb_cache_remove(vq, imsg->iova,
+ imsg->size);
+
+ if (is_vring_iotlb(dev, vq, imsg))
+ vring_invalidate(dev, vq);
+ }
+ break;
+ default:
+ VHOST_LOG_CONFIG(ERR, "Invalid IOTLB message type (%d)\n",
+ imsg->type);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_set_postcopy_advise(struct virtio_net **pdev,
+ struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+ struct uffdio_api api_struct;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+ if (dev->postcopy_ufd == -1) {
+ VHOST_LOG_CONFIG(ERR, "Userfaultfd not available: %s\n",
+ strerror(errno));
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ api_struct.api = UFFD_API;
+ api_struct.features = 0;
+ if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
+ VHOST_LOG_CONFIG(ERR, "UFFDIO_API ioctl failure: %s\n",
+ strerror(errno));
+ close(dev->postcopy_ufd);
+ dev->postcopy_ufd = -1;
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ msg->fds[0] = dev->postcopy_ufd;
+ msg->fd_num = 1;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+#else
+ dev->postcopy_ufd = -1;
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_ERR;
+#endif
+}
+
+static int
+vhost_user_set_postcopy_listen(struct virtio_net **pdev,
+ struct VhostUserMsg *msg __rte_unused,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ if (dev->mem && dev->mem->nregions) {
+ VHOST_LOG_CONFIG(ERR,
+ "Regions already registered at postcopy-listen\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ dev->postcopy_listening = 1;
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ dev->postcopy_listening = 0;
+ if (dev->postcopy_ufd >= 0) {
+ close(dev->postcopy_ufd);
+ dev->postcopy_ufd = -1;
+ }
+
+ msg->payload.u64 = 0;
+ msg->size = sizeof(msg->payload.u64);
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_get_status(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ msg->payload.u64 = dev->status;
+ msg->size = sizeof(msg->payload.u64);
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_status(struct virtio_net **pdev, struct VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ struct virtio_net *dev = *pdev;
+
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ /* As per Virtio specification, the device status is 8bits long */
+ if (msg->payload.u64 > UINT8_MAX) {
+ VHOST_LOG_CONFIG(ERR, "Invalid VHOST_USER_SET_STATUS payload 0x%" PRIx64 "\n",
+ msg->payload.u64);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ dev->status = msg->payload.u64;
+
+ if ((dev->status & VIRTIO_DEVICE_STATUS_FEATURES_OK) &&
+ (dev->flags & VIRTIO_DEV_FEATURES_FAILED)) {
+ VHOST_LOG_CONFIG(ERR, "FEATURES_OK bit is set but feature negotiation failed\n");
+ /*
+ * Clear the bit to let the driver know about the feature
+ * negotiation failure
+ */
+ dev->status &= ~VIRTIO_DEVICE_STATUS_FEATURES_OK;
+ }
+
+ VHOST_LOG_CONFIG(INFO, "New device status(0x%08x):\n"
+ "\t-RESET: %u\n"
+ "\t-ACKNOWLEDGE: %u\n"
+ "\t-DRIVER: %u\n"
+ "\t-FEATURES_OK: %u\n"
+ "\t-DRIVER_OK: %u\n"
+ "\t-DEVICE_NEED_RESET: %u\n"
+ "\t-FAILED: %u\n",
+ dev->status,
+ (dev->status == VIRTIO_DEVICE_STATUS_RESET),
+ !!(dev->status & VIRTIO_DEVICE_STATUS_ACK),
+ !!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER),
+ !!(dev->status & VIRTIO_DEVICE_STATUS_FEATURES_OK),
+ !!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK),
+ !!(dev->status & VIRTIO_DEVICE_STATUS_DEV_NEED_RESET),
+ !!(dev->status & VIRTIO_DEVICE_STATUS_FAILED));
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
+ struct VhostUserMsg *msg,
+ int main_fd);
+static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = {
+ [VHOST_USER_NONE] = NULL,
+ [VHOST_USER_GET_FEATURES] = vhost_user_get_features,
+ [VHOST_USER_SET_FEATURES] = vhost_user_set_features,
+ [VHOST_USER_SET_OWNER] = vhost_user_set_owner,
+ [VHOST_USER_RESET_OWNER] = vhost_user_reset_owner,
+ [VHOST_USER_SET_MEM_TABLE] = vhost_user_set_mem_table,
+ [VHOST_USER_SET_LOG_BASE] = vhost_user_set_log_base,
+ [VHOST_USER_SET_LOG_FD] = vhost_user_set_log_fd,
+ [VHOST_USER_SET_VRING_NUM] = vhost_user_set_vring_num,
+ [VHOST_USER_SET_VRING_ADDR] = vhost_user_set_vring_addr,
+ [VHOST_USER_SET_VRING_BASE] = vhost_user_set_vring_base,
+ [VHOST_USER_GET_VRING_BASE] = vhost_user_get_vring_base,
+ [VHOST_USER_SET_VRING_KICK] = vhost_user_set_vring_kick,
+ [VHOST_USER_SET_VRING_CALL] = vhost_user_set_vring_call,
+ [VHOST_USER_SET_VRING_ERR] = vhost_user_set_vring_err,
+ [VHOST_USER_GET_PROTOCOL_FEATURES] = vhost_user_get_protocol_features,
+ [VHOST_USER_SET_PROTOCOL_FEATURES] = vhost_user_set_protocol_features,
+ [VHOST_USER_GET_QUEUE_NUM] = vhost_user_get_queue_num,
+ [VHOST_USER_SET_VRING_ENABLE] = vhost_user_set_vring_enable,
+ [VHOST_USER_SEND_RARP] = vhost_user_send_rarp,
+ [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
+ [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
+ [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+ [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
+ [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
+ [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
+ [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd,
+ [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd,
+ [VHOST_USER_SET_STATUS] = vhost_user_set_status,
+ [VHOST_USER_GET_STATUS] = vhost_user_get_status,
+};
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS, &msg->fd_num);
+ if (ret <= 0) {
+ return ret;
+ } else if (ret != VHOST_USER_HDR_SIZE) {
+ VHOST_LOG_CONFIG(ERR, "Unexpected header size read\n");
+ close_msg_fds(msg);
+ return -1;
+ }
+
+ if (msg->size) {
+ if (msg->size > sizeof(msg->payload)) {
+ VHOST_LOG_CONFIG(ERR,
+ "invalid msg size: %d\n", msg->size);
+ return -1;
+ }
+ ret = read(sockfd, &msg->payload, msg->size);
+ if (ret <= 0)
+ return ret;
+ if (ret != (int)msg->size) {
+ VHOST_LOG_CONFIG(ERR,
+ "read control message failed\n");
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ if (!msg)
+ return 0;
+
+ return send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, msg->fds, msg->fd_num);
+}
+
+static int
+send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
+{
+ if (!msg)
+ return 0;
+
+ msg->flags &= ~VHOST_USER_VERSION_MASK;
+ msg->flags &= ~VHOST_USER_NEED_REPLY;
+ msg->flags |= VHOST_USER_VERSION;
+ msg->flags |= VHOST_USER_REPLY_MASK;
+
+ return send_vhost_message(sockfd, msg);
+}
+
+static int
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ if (msg->flags & VHOST_USER_NEED_REPLY)
+ rte_spinlock_lock(&dev->slave_req_lock);
+
+ ret = send_vhost_message(dev->slave_req_fd, msg);
+ if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
+ rte_spinlock_unlock(&dev->slave_req_lock);
+
+ return ret;
+}
+
+/*
+ * Allocate a queue pair if it hasn't been allocated yet
+ */
+static int
+vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
+ struct VhostUserMsg *msg)
+{
+ uint32_t vring_idx;
+
+ switch (msg->request.master) {
+ case VHOST_USER_SET_VRING_KICK:
+ case VHOST_USER_SET_VRING_CALL:
+ case VHOST_USER_SET_VRING_ERR:
+ vring_idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ break;
+ case VHOST_USER_SET_VRING_NUM:
+ case VHOST_USER_SET_VRING_BASE:
+ case VHOST_USER_SET_VRING_ENABLE:
+ vring_idx = msg->payload.state.index;
+ break;
+ case VHOST_USER_SET_VRING_ADDR:
+ vring_idx = msg->payload.addr.index;
+ break;
+ default:
+ return 0;
+ }
+
+ if (vring_idx >= VHOST_MAX_VRING) {
+ VHOST_LOG_CONFIG(ERR,
+ "invalid vring index: %u\n", vring_idx);
+ return -1;
+ }
+
+ if (dev->virtqueue[vring_idx])
+ return 0;
+
+ return alloc_vring_queue(dev, vring_idx);
+}
+
+static void
+vhost_user_lock_all_queue_pairs(struct virtio_net *dev)
+{
+ unsigned int i = 0;
+ unsigned int vq_num = 0;
+
+ while (vq_num < dev->nr_vring) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ if (vq) {
+ rte_spinlock_lock(&vq->access_lock);
+ vq_num++;
+ }
+ i++;
+ }
+}
+
+static void
+vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)
+{
+ unsigned int i = 0;
+ unsigned int vq_num = 0;
+
+ while (vq_num < dev->nr_vring) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ if (vq) {
+ rte_spinlock_unlock(&vq->access_lock);
+ vq_num++;
+ }
+ i++;
+ }
+}
+
+int
+vhost_user_msg_handler(int vid, int fd)
+{
+ struct virtio_net *dev;
+ struct VhostUserMsg msg;
+ struct rte_vdpa_device *vdpa_dev;