1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2019 Intel Corporation
10 #include <semaphore.h>
11 #include <linux/virtio_blk.h>
12 #include <linux/virtio_ring.h>
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
20 #include "vhost_blk.h"
23 #define VIRTQ_DESC_F_NEXT 1
24 #define VIRTQ_DESC_F_AVAIL (1 << 7)
25 #define VIRTQ_DESC_F_USED (1 << 15)
29 #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
30 (1ULL << VIRTIO_F_VERSION_1) |\
31 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
32 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
34 struct vhost_blk_ctrlr *g_vhost_ctrlr;
36 /* Path to folder where character device will be created. Can be set by user. */
37 static char dev_pathname[PATH_MAX] = "";
38 static sem_t exit_sem;
39 static int g_should_stop = -1;
41 struct vhost_blk_ctrlr *
42 vhost_blk_ctrlr_find(const char *ctrlr_name)
44 if (ctrlr_name == NULL)
47 /* currently we only support 1 socket file fd */
51 static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
54 struct vhost_blk_ctrlr *ctrlr;
57 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
59 fprintf(stderr, "Cannot get socket name\n");
63 ctrlr = vhost_blk_ctrlr_find(path);
65 fprintf(stderr, "Controller is not ready\n");
66 assert(ctrlr != NULL);
69 assert(ctrlr->mem != NULL);
71 return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
74 static struct vring_packed_desc *
75 descriptor_get_next_packed(struct rte_vhost_vring *vq,
78 if (vq->desc_packed[*idx % vq->size].flags & VIRTQ_DESC_F_NEXT) {
80 return &vq->desc_packed[*idx % vq->size];
87 descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
89 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
93 descriptor_is_wr_packed(struct vring_packed_desc *cur_desc)
95 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
98 static struct rte_vhost_inflight_desc_packed *
99 inflight_desc_get_next(struct rte_vhost_inflight_info_packed *inflight_packed,
100 struct rte_vhost_inflight_desc_packed *cur_desc)
102 if (!!(cur_desc->flags & VIRTQ_DESC_F_NEXT))
103 return &inflight_packed->desc[cur_desc->next];
109 inflight_desc_has_next(struct rte_vhost_inflight_desc_packed *cur_desc)
111 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
115 inflight_desc_is_wr(struct rte_vhost_inflight_desc_packed *cur_desc)
117 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
121 inflight_process_payload_chain_packed(struct inflight_blk_task *task)
125 struct vhost_blk_task *blk_task;
126 struct rte_vhost_inflight_desc_packed *desc;
128 blk_task = &task->blk_task;
129 blk_task->iovs_cnt = 0;
132 desc = task->inflight_desc;
133 chunck_len = desc->len;
134 data = (void *)(uintptr_t)gpa_to_vva(blk_task->bdev->vid,
137 if (!data || chunck_len != desc->len) {
138 fprintf(stderr, "failed to translate desc address.\n");
142 blk_task->iovs[blk_task->iovs_cnt].iov_base = data;
143 blk_task->iovs[blk_task->iovs_cnt].iov_len = desc->len;
144 blk_task->data_len += desc->len;
145 blk_task->iovs_cnt++;
146 task->inflight_desc = inflight_desc_get_next(
147 task->inflight_packed, desc);
148 } while (inflight_desc_has_next(task->inflight_desc));
150 chunck_len = task->inflight_desc->len;
151 blk_task->status = (void *)(uintptr_t)gpa_to_vva(
152 blk_task->bdev->vid, task->inflight_desc->addr, &chunck_len);
153 if (!blk_task->status || chunck_len != task->inflight_desc->len)
154 fprintf(stderr, "failed to translate desc address.\n");
158 inflight_submit_completion_packed(struct inflight_blk_task *task,
159 uint32_t q_idx, uint16_t *used_id,
160 bool *used_wrap_counter)
162 struct vhost_blk_ctrlr *ctrlr;
163 struct rte_vhost_vring *vq;
164 struct vring_packed_desc *desc;
167 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
168 vq = task->blk_task.vq;
170 ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
171 task->blk_task.head_idx);
173 fprintf(stderr, "failed to set last inflight io\n");
175 desc = &vq->desc_packed[*used_id];
176 desc->id = task->blk_task.buffer_id;
178 if (*used_wrap_counter)
179 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
181 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
184 *used_id += task->blk_task.iovs_cnt + 2;
185 if (*used_id >= vq->size) {
186 *used_id -= vq->size;
187 *used_wrap_counter = !(*used_wrap_counter);
190 ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
191 task->blk_task.head_idx);
193 fprintf(stderr, "failed to clear inflight io\n");
195 /* Send an interrupt back to the guest VM so that it knows
196 * a completion is ready to be processed.
198 rte_vhost_vring_call(task->blk_task.bdev->vid, q_idx);
202 submit_completion_packed(struct vhost_blk_task *task, uint32_t q_idx,
203 uint16_t *used_id, bool *used_wrap_counter)
205 struct vhost_blk_ctrlr *ctrlr;
206 struct rte_vhost_vring *vq;
207 struct vring_packed_desc *desc;
210 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
213 ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
216 fprintf(stderr, "failed to set last inflight io\n");
218 desc = &vq->desc_packed[*used_id];
219 desc->id = task->buffer_id;
221 if (*used_wrap_counter)
222 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
224 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
227 *used_id += task->iovs_cnt + 2;
228 if (*used_id >= vq->size) {
229 *used_id -= vq->size;
230 *used_wrap_counter = !(*used_wrap_counter);
233 ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
236 fprintf(stderr, "failed to clear inflight io\n");
238 /* Send an interrupt back to the guest VM so that it knows
239 * a completion is ready to be processed.
241 rte_vhost_vring_call(task->bdev->vid, q_idx);
245 vhost_process_payload_chain_packed(struct vhost_blk_task *task,
254 chunck_len = task->desc_packed->len;
255 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
256 task->desc_packed->addr,
258 if (!data || chunck_len != task->desc_packed->len) {
259 fprintf(stderr, "failed to translate desc address.\n");
263 task->iovs[task->iovs_cnt].iov_base = data;
264 task->iovs[task->iovs_cnt].iov_len = task->desc_packed->len;
265 task->data_len += task->desc_packed->len;
267 task->desc_packed = descriptor_get_next_packed(task->vq, idx);
268 } while (descriptor_has_next_packed(task->desc_packed));
270 task->last_idx = *idx % task->vq->size;
271 chunck_len = task->desc_packed->len;
272 task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
273 task->desc_packed->addr,
275 if (!task->status || chunck_len != task->desc_packed->len)
276 fprintf(stderr, "failed to translate desc address.\n");
281 descriptor_is_available(struct rte_vhost_vring *vring, uint16_t idx,
282 bool avail_wrap_counter)
284 uint16_t flags = vring->desc_packed[idx].flags;
286 return ((!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter) &&
287 (!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter));
291 process_requestq_packed(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
293 bool avail_wrap_counter, used_wrap_counter;
294 uint16_t avail_idx, used_idx;
297 struct vhost_blk_queue *blk_vq;
298 struct rte_vhost_vring *vq;
299 struct vhost_blk_task *task;
301 blk_vq = &ctrlr->bdev->queues[q_idx];
304 avail_idx = blk_vq->last_avail_idx;
305 avail_wrap_counter = blk_vq->avail_wrap_counter;
306 used_idx = blk_vq->last_used_idx;
307 used_wrap_counter = blk_vq->used_wrap_counter;
309 task = rte_zmalloc(NULL, sizeof(*task), 0);
310 assert(task != NULL);
312 task->bdev = ctrlr->bdev;
314 while (descriptor_is_available(vq, avail_idx, avail_wrap_counter)) {
315 task->head_idx = avail_idx;
316 task->desc_packed = &task->vq->desc_packed[task->head_idx];
322 /* does not support indirect descriptors */
323 assert((task->desc_packed->flags & VRING_DESC_F_INDIRECT) == 0);
325 chunck_len = task->desc_packed->len;
326 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
327 task->desc_packed->addr, &chunck_len);
328 if (!task->req || chunck_len != task->desc_packed->len) {
329 fprintf(stderr, "failed to translate desc address.\n");
334 task->desc_packed = descriptor_get_next_packed(task->vq,
336 assert(task->desc_packed != NULL);
337 if (!descriptor_has_next_packed(task->desc_packed)) {
338 task->dxfer_dir = BLK_DIR_NONE;
339 task->last_idx = avail_idx % vq->size;
340 chunck_len = task->desc_packed->len;
341 task->status = (void *)(uintptr_t)
342 gpa_to_vva(task->bdev->vid,
343 task->desc_packed->addr,
346 chunck_len != task->desc_packed->len) {
348 "failed to translate desc address.\n");
353 task->readtype = descriptor_is_wr_packed(
355 vhost_process_payload_chain_packed(task, &avail_idx);
357 task->buffer_id = vq->desc_packed[task->last_idx].id;
358 rte_vhost_set_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
361 &task->inflight_idx);
363 if (++avail_idx >= vq->size) {
364 avail_idx -= vq->size;
365 avail_wrap_counter = !avail_wrap_counter;
367 blk_vq->last_avail_idx = avail_idx;
368 blk_vq->avail_wrap_counter = avail_wrap_counter;
370 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
372 /* invalid response */
373 *task->status = VIRTIO_BLK_S_IOERR;
376 *task->status = VIRTIO_BLK_S_OK;
379 submit_completion_packed(task, q_idx, &used_idx,
381 blk_vq->last_used_idx = used_idx;
382 blk_vq->used_wrap_counter = used_wrap_counter;
389 submit_inflight_vq_packed(struct vhost_blk_ctrlr *ctrlr,
392 bool used_wrap_counter;
396 struct vhost_blk_queue *blk_vq;
397 struct rte_vhost_ring_inflight *inflight_vq;
398 struct rte_vhost_resubmit_info *resubmit_info;
399 struct rte_vhost_vring *vq;
400 struct inflight_blk_task *task;
401 struct vhost_blk_task *blk_task;
402 struct rte_vhost_inflight_info_packed *inflight_info;
404 blk_vq = &ctrlr->bdev->queues[q_idx];
406 inflight_vq = &blk_vq->inflight_vq;
407 resubmit_info = inflight_vq->resubmit_inflight;
408 inflight_info = inflight_vq->inflight_packed;
409 used_idx = blk_vq->last_used_idx;
410 used_wrap_counter = blk_vq->used_wrap_counter;
412 task = rte_malloc(NULL, sizeof(*task), 0);
414 fprintf(stderr, "failed to allocate memory\n");
417 blk_task = &task->blk_task;
419 blk_task->bdev = ctrlr->bdev;
420 task->inflight_packed = inflight_vq->inflight_packed;
422 while (resubmit_info->resubmit_num-- > 0) {
423 req_idx = resubmit_info->resubmit_num;
425 resubmit_info->resubmit_list[req_idx].index;
426 task->inflight_desc =
427 &inflight_info->desc[blk_task->head_idx];
428 task->blk_task.iovs_cnt = 0;
429 task->blk_task.data_len = 0;
430 task->blk_task.req = NULL;
431 task->blk_task.status = NULL;
433 /* update the avail idx too
434 * as it's initial value equals to used idx
436 blk_vq->last_avail_idx += task->inflight_desc->num;
437 if (blk_vq->last_avail_idx >= vq->size) {
438 blk_vq->last_avail_idx -= vq->size;
439 blk_vq->avail_wrap_counter =
440 !blk_vq->avail_wrap_counter;
443 /* does not support indirect descriptors */
444 assert(task->inflight_desc != NULL);
445 assert((task->inflight_desc->flags &
446 VRING_DESC_F_INDIRECT) == 0);
448 chunck_len = task->inflight_desc->len;
449 blk_task->req = (void *)(uintptr_t)
450 gpa_to_vva(blk_task->bdev->vid,
451 task->inflight_desc->addr,
453 if (!blk_task->req ||
454 chunck_len != task->inflight_desc->len) {
455 fprintf(stderr, "failed to translate desc address.\n");
460 task->inflight_desc = inflight_desc_get_next(
461 task->inflight_packed, task->inflight_desc);
462 assert(task->inflight_desc != NULL);
463 if (!inflight_desc_has_next(task->inflight_desc)) {
464 blk_task->dxfer_dir = BLK_DIR_NONE;
465 chunck_len = task->inflight_desc->len;
466 blk_task->status = (void *)(uintptr_t)
467 gpa_to_vva(blk_task->bdev->vid,
468 task->inflight_desc->addr,
470 if (!blk_task->status ||
471 chunck_len != task->inflight_desc->len) {
473 "failed to translate desc address.\n");
479 inflight_desc_is_wr(task->inflight_desc);
480 inflight_process_payload_chain_packed(task);
483 blk_task->buffer_id = task->inflight_desc->id;
485 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, blk_task);
487 /* invalid response */
488 *blk_task->status = VIRTIO_BLK_S_IOERR;
491 *blk_task->status = VIRTIO_BLK_S_OK;
493 inflight_submit_completion_packed(task, q_idx, &used_idx,
496 blk_vq->last_used_idx = used_idx;
497 blk_vq->used_wrap_counter = used_wrap_counter;
503 static struct vring_desc *
504 descriptor_get_next_split(struct vring_desc *vq_desc,
505 struct vring_desc *cur_desc)
507 return &vq_desc[cur_desc->next];
511 descriptor_has_next_split(struct vring_desc *cur_desc)
513 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
517 descriptor_is_wr_split(struct vring_desc *cur_desc)
519 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
523 vhost_process_payload_chain_split(struct vhost_blk_task *task)
531 chunck_len = task->desc_split->len;
532 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
533 task->desc_split->addr,
535 if (!data || chunck_len != task->desc_split->len) {
536 fprintf(stderr, "failed to translate desc address.\n");
540 task->iovs[task->iovs_cnt].iov_base = data;
541 task->iovs[task->iovs_cnt].iov_len = task->desc_split->len;
542 task->data_len += task->desc_split->len;
545 descriptor_get_next_split(task->vq->desc, task->desc_split);
546 } while (descriptor_has_next_split(task->desc_split));
548 chunck_len = task->desc_split->len;
549 task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
550 task->desc_split->addr,
552 if (!task->status || chunck_len != task->desc_split->len)
553 fprintf(stderr, "failed to translate desc address.\n");
557 submit_completion_split(struct vhost_blk_task *task, uint32_t vid,
560 struct rte_vhost_vring *vq;
561 struct vring_used *used;
566 rte_vhost_set_last_inflight_io_split(vid, q_idx, task->req_idx);
568 /* Fill out the next entry in the "used" ring. id = the
569 * index of the descriptor that contained the blk request.
570 * len = the total amount of data transferred for the blk
571 * request. We must report the correct len, for variable
572 * length blk CDBs, where we may return less data than
573 * allocated by the guest VM.
575 used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
576 used->ring[used->idx & (vq->size - 1)].len = task->data_len;
581 rte_vhost_clr_inflight_desc_split(vid, q_idx, used->idx, task->req_idx);
583 /* Send an interrupt back to the guest VM so that it knows
584 * a completion is ready to be processed.
586 rte_vhost_vring_call(task->bdev->vid, q_idx);
590 submit_inflight_vq_split(struct vhost_blk_ctrlr *ctrlr,
593 struct vhost_blk_queue *blk_vq;
594 struct rte_vhost_ring_inflight *inflight_vq;
595 struct rte_vhost_resubmit_info *resubmit_inflight;
596 struct rte_vhost_resubmit_desc *resubmit_list;
597 struct vhost_blk_task *task;
602 blk_vq = &ctrlr->bdev->queues[q_idx];
603 inflight_vq = &blk_vq->inflight_vq;
604 resubmit_inflight = inflight_vq->resubmit_inflight;
605 resubmit_list = resubmit_inflight->resubmit_list;
607 task = rte_zmalloc(NULL, sizeof(*task), 0);
608 assert(task != NULL);
611 task->bdev = ctrlr->bdev;
612 task->vq = &blk_vq->vq;
614 while (resubmit_inflight->resubmit_num-- > 0) {
615 req_idx = resubmit_list[resubmit_inflight->resubmit_num].index;
616 task->req_idx = req_idx;
617 task->desc_split = &task->vq->desc[task->req_idx];
623 /* does not support indirect descriptors */
624 assert(task->desc_split != NULL);
625 assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
627 chunck_len = task->desc_split->len;
628 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
629 task->desc_split->addr, &chunck_len);
630 if (!task->req || chunck_len != task->desc_split->len) {
631 fprintf(stderr, "failed to translate desc address.\n");
636 task->desc_split = descriptor_get_next_split(task->vq->desc,
638 if (!descriptor_has_next_split(task->desc_split)) {
639 task->dxfer_dir = BLK_DIR_NONE;
640 chunck_len = task->desc_split->len;
641 task->status = (void *)(uintptr_t)
642 gpa_to_vva(task->bdev->vid,
643 task->desc_split->addr,
646 chunck_len != task->desc_split->len) {
648 "failed to translate desc address.\n");
654 descriptor_is_wr_split(task->desc_split);
655 vhost_process_payload_chain_split(task);
658 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
660 /* invalid response */
661 *task->status = VIRTIO_BLK_S_IOERR;
664 *task->status = VIRTIO_BLK_S_OK;
666 submit_completion_split(task, ctrlr->bdev->vid, q_idx);
673 process_requestq_split(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
679 struct vhost_blk_queue *blk_vq;
680 struct rte_vhost_vring *vq;
681 struct vhost_blk_task *task;
683 blk_vq = &ctrlr->bdev->queues[q_idx];
686 task = rte_zmalloc(NULL, sizeof(*task), 0);
687 assert(task != NULL);
689 task->bdev = ctrlr->bdev;
692 while (vq->avail->idx != blk_vq->last_avail_idx) {
693 last_idx = blk_vq->last_avail_idx & (vq->size - 1);
694 req_idx = vq->avail->ring[last_idx];
695 task->req_idx = req_idx;
696 task->desc_split = &task->vq->desc[task->req_idx];
702 rte_vhost_set_inflight_desc_split(ctrlr->bdev->vid, q_idx,
705 /* does not support indirect descriptors */
706 assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
708 chunck_len = task->desc_split->len;
709 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
710 task->desc_split->addr, &chunck_len);
711 if (!task->req || chunck_len != task->desc_split->len) {
712 fprintf(stderr, "failed to translate desc address.\n");
717 task->desc_split = descriptor_get_next_split(task->vq->desc,
719 if (!descriptor_has_next_split(task->desc_split)) {
720 task->dxfer_dir = BLK_DIR_NONE;
721 chunck_len = task->desc_split->len;
722 task->status = (void *)(uintptr_t)
723 gpa_to_vva(task->bdev->vid,
724 task->desc_split->addr,
727 chunck_len != task->desc_split->len) {
729 "failed to translate desc address.\n");
735 descriptor_is_wr_split(task->desc_split);
736 vhost_process_payload_chain_split(task);
738 blk_vq->last_avail_idx++;
740 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
742 /* invalid response */
743 *task->status = VIRTIO_BLK_S_IOERR;
746 *task->status = VIRTIO_BLK_S_OK;
749 submit_completion_split(task, ctrlr->bdev->vid, q_idx);
756 ctrlr_worker(void *arg)
758 struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
759 struct vhost_blk_queue *blk_vq;
760 struct rte_vhost_ring_inflight *inflight_vq;
765 fprintf(stdout, "Ctrlr Worker Thread start\n");
767 if (ctrlr == NULL || ctrlr->bdev == NULL) {
769 "%s: Error, invalid argument passed to worker thread\n",
774 thread = pthread_self();
777 pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
779 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
780 blk_vq = &ctrlr->bdev->queues[i];
781 inflight_vq = &blk_vq->inflight_vq;
782 if (inflight_vq->resubmit_inflight != NULL &&
783 inflight_vq->resubmit_inflight->resubmit_num != 0) {
784 if (ctrlr->packed_ring)
785 submit_inflight_vq_packed(ctrlr, i);
787 submit_inflight_vq_split(ctrlr, i);
791 while (!g_should_stop && ctrlr->bdev != NULL) {
792 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
793 if (ctrlr->packed_ring)
794 process_requestq_packed(ctrlr, i);
796 process_requestq_split(ctrlr, i);
801 fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
809 struct vhost_blk_ctrlr *ctrlr;
810 struct vhost_blk_queue *blk_vq;
811 struct rte_vhost_vring *vq;
816 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
818 fprintf(stderr, "Controller is not ready\n");
825 ctrlr->bdev->vid = vid;
826 ret = rte_vhost_get_negotiated_features(vid, &features);
828 fprintf(stderr, "failed to get the negotiated features\n");
831 ctrlr->packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
833 ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
835 fprintf(stderr, "Get Controller memory region failed\n");
836 assert(ctrlr->mem != NULL);
838 /* Disable Notifications and init last idx */
839 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
840 blk_vq = &ctrlr->bdev->queues[i];
843 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
846 ret = rte_vhost_get_vring_base(ctrlr->bdev->vid, i,
847 &blk_vq->last_avail_idx,
848 &blk_vq->last_used_idx);
851 ret = rte_vhost_get_vhost_ring_inflight(ctrlr->bdev->vid, i,
852 &blk_vq->inflight_vq);
855 if (ctrlr->packed_ring) {
856 /* for the reconnection */
857 ret = rte_vhost_get_vring_base_from_inflight(
859 &blk_vq->last_avail_idx,
860 &blk_vq->last_used_idx);
863 blk_vq->avail_wrap_counter = blk_vq->last_avail_idx &
865 blk_vq->last_avail_idx = blk_vq->last_avail_idx &
867 blk_vq->used_wrap_counter = blk_vq->last_used_idx &
869 blk_vq->last_used_idx = blk_vq->last_used_idx &
873 rte_vhost_enable_guest_notification(vid, i, 0);
876 /* start polling vring */
878 fprintf(stdout, "New Device %s, Device ID %d\n", dev_pathname, vid);
879 if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
880 fprintf(stderr, "Worker Thread Started Failed\n");
884 /* device has been started */
891 destroy_device(int vid)
894 struct vhost_blk_ctrlr *ctrlr;
895 struct vhost_blk_queue *blk_vq;
898 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
900 fprintf(stderr, "Destroy Ctrlr Failed\n");
904 fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
905 ctrlr = vhost_blk_ctrlr_find(path);
907 fprintf(stderr, "Destroy Ctrlr Failed\n");
915 while (g_should_stop != 2)
918 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
919 blk_vq = &ctrlr->bdev->queues[i];
920 if (ctrlr->packed_ring) {
921 blk_vq->last_avail_idx |= (blk_vq->avail_wrap_counter <<
923 blk_vq->last_used_idx |= (blk_vq->used_wrap_counter <<
926 rte_vhost_set_vring_base(ctrlr->bdev->vid, i,
927 blk_vq->last_avail_idx,
928 blk_vq->last_used_idx);
938 new_connection(int vid)
940 /* extend the proper features for block device */
941 vhost_session_install_rte_compat_hooks(vid);
946 struct vhost_device_ops vhost_blk_device_ops = {
947 .new_device = new_device,
948 .destroy_device = destroy_device,
949 .new_connection = new_connection,
952 static struct vhost_block_dev *
953 vhost_blk_bdev_construct(const char *bdev_name,
954 const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt,
957 struct vhost_block_dev *bdev;
959 bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
963 strncpy(bdev->name, bdev_name, sizeof(bdev->name));
964 strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
965 bdev->blocklen = blk_size;
966 bdev->blockcnt = blk_cnt;
967 bdev->write_cache = wce_enable;
969 fprintf(stdout, "blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
972 /* use memory as disk storage space */
973 bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
975 fprintf(stderr, "no enough reserved huge memory for disk\n");
983 static struct vhost_blk_ctrlr *
984 vhost_blk_ctrlr_construct(const char *ctrlr_name)
987 struct vhost_blk_ctrlr *ctrlr;
991 /* always use current directory */
992 path = getcwd(cwd, PATH_MAX);
994 fprintf(stderr, "Cannot get current working directory\n");
997 snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
999 unlink(dev_pathname);
1001 if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
1002 fprintf(stderr, "socket %s already exists\n", dev_pathname);
1006 ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES);
1008 fprintf(stderr, "Set vhost driver features failed\n");
1009 rte_vhost_driver_unregister(dev_pathname);
1013 /* set proper features */
1014 vhost_dev_install_rte_compat_hooks(dev_pathname);
1016 ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
1018 rte_vhost_driver_unregister(dev_pathname);
1022 /* hardcoded block device information with 128MiB */
1023 ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0",
1027 rte_vhost_driver_unregister(dev_pathname);
1031 rte_vhost_driver_callback_register(dev_pathname,
1032 &vhost_blk_device_ops);
1038 signal_handler(__rte_unused int signum)
1040 struct vhost_blk_ctrlr *ctrlr;
1042 unlink(dev_pathname);
1044 if (g_should_stop != -1) {
1046 while (g_should_stop != 2)
1050 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
1051 if (ctrlr != NULL) {
1052 if (ctrlr->bdev != NULL) {
1053 rte_free(ctrlr->bdev->data);
1054 rte_free(ctrlr->bdev);
1059 rte_vhost_driver_unregister(dev_pathname);
1063 int main(int argc, char *argv[])
1067 signal(SIGINT, signal_handler);
1070 ret = rte_eal_init(argc, argv);
1072 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1074 g_vhost_ctrlr = vhost_blk_ctrlr_construct("vhost.socket");
1075 if (g_vhost_ctrlr == NULL) {
1076 fprintf(stderr, "Construct vhost blk controller failed\n");
1080 if (sem_init(&exit_sem, 0, 0) < 0) {
1081 fprintf(stderr, "Error init exit_sem\n");
1085 rte_vhost_driver_start(dev_pathname);
1087 /* loop for exit the application */