1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2019 Intel Corporation
10 #include <semaphore.h>
11 #include <linux/virtio_blk.h>
12 #include <linux/virtio_ring.h>
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
20 #include "vhost_blk.h"
23 #define VIRTQ_DESC_F_NEXT 1
24 #define VIRTQ_DESC_F_AVAIL (1 << 7)
25 #define VIRTQ_DESC_F_USED (1 << 15)
29 #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
30 (1ULL << VIRTIO_F_VERSION_1) |\
31 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
32 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
34 /* Path to folder where character device will be created. Can be set by user. */
35 static char dev_pathname[PATH_MAX] = "";
36 static sem_t exit_sem;
37 static int g_should_stop = -1;
39 struct vhost_blk_ctrlr *
40 vhost_blk_ctrlr_find(const char *ctrlr_name)
42 if (ctrlr_name == NULL)
45 /* currently we only support 1 socket file fd */
49 static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
52 struct vhost_blk_ctrlr *ctrlr;
55 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
57 fprintf(stderr, "Cannot get socket name\n");
61 ctrlr = vhost_blk_ctrlr_find(path);
63 fprintf(stderr, "Controller is not ready\n");
64 assert(ctrlr != NULL);
67 assert(ctrlr->mem != NULL);
69 return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
72 static struct vring_packed_desc *
73 descriptor_get_next_packed(struct rte_vhost_vring *vq,
76 if (vq->desc_packed[*idx % vq->size].flags & VIRTQ_DESC_F_NEXT) {
78 return &vq->desc_packed[*idx % vq->size];
85 descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
87 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
91 descriptor_is_wr_packed(struct vring_packed_desc *cur_desc)
93 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
96 static struct rte_vhost_inflight_desc_packed *
97 inflight_desc_get_next(struct rte_vhost_inflight_info_packed *inflight_packed,
98 struct rte_vhost_inflight_desc_packed *cur_desc)
100 if (!!(cur_desc->flags & VIRTQ_DESC_F_NEXT))
101 return &inflight_packed->desc[cur_desc->next];
107 inflight_desc_has_next(struct rte_vhost_inflight_desc_packed *cur_desc)
109 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
113 inflight_desc_is_wr(struct rte_vhost_inflight_desc_packed *cur_desc)
115 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
119 inflight_process_payload_chain_packed(struct inflight_blk_task *task)
123 struct vhost_blk_task *blk_task;
124 struct rte_vhost_inflight_desc_packed *desc;
126 blk_task = &task->blk_task;
127 blk_task->iovs_cnt = 0;
130 desc = task->inflight_desc;
131 chunck_len = desc->len;
132 data = (void *)(uintptr_t)gpa_to_vva(blk_task->bdev->vid,
135 if (!data || chunck_len != desc->len) {
136 fprintf(stderr, "failed to translate desc address.\n");
140 blk_task->iovs[blk_task->iovs_cnt].iov_base = data;
141 blk_task->iovs[blk_task->iovs_cnt].iov_len = desc->len;
142 blk_task->data_len += desc->len;
143 blk_task->iovs_cnt++;
144 task->inflight_desc = inflight_desc_get_next(
145 task->inflight_packed, desc);
146 } while (inflight_desc_has_next(task->inflight_desc));
148 chunck_len = task->inflight_desc->len;
149 blk_task->status = (void *)(uintptr_t)gpa_to_vva(
150 blk_task->bdev->vid, task->inflight_desc->addr, &chunck_len);
151 if (!blk_task->status || chunck_len != task->inflight_desc->len)
152 fprintf(stderr, "failed to translate desc address.\n");
156 inflight_submit_completion_packed(struct inflight_blk_task *task,
157 uint32_t q_idx, uint16_t *used_id,
158 bool *used_wrap_counter)
160 struct vhost_blk_ctrlr *ctrlr;
161 struct rte_vhost_vring *vq;
162 struct vring_packed_desc *desc;
165 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
166 vq = task->blk_task.vq;
168 ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
169 task->blk_task.head_idx);
171 fprintf(stderr, "failed to set last inflight io\n");
173 desc = &vq->desc_packed[*used_id];
174 desc->id = task->blk_task.buffer_id;
176 if (*used_wrap_counter)
177 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
179 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
182 *used_id += task->blk_task.iovs_cnt + 2;
183 if (*used_id >= vq->size) {
184 *used_id -= vq->size;
185 *used_wrap_counter = !(*used_wrap_counter);
188 ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
189 task->blk_task.head_idx);
191 fprintf(stderr, "failed to clear inflight io\n");
193 /* Send an interrupt back to the guest VM so that it knows
194 * a completion is ready to be processed.
196 rte_vhost_vring_call(task->blk_task.bdev->vid, q_idx);
200 submit_completion_packed(struct vhost_blk_task *task, uint32_t q_idx,
201 uint16_t *used_id, bool *used_wrap_counter)
203 struct vhost_blk_ctrlr *ctrlr;
204 struct rte_vhost_vring *vq;
205 struct vring_packed_desc *desc;
208 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
211 ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
214 fprintf(stderr, "failed to set last inflight io\n");
216 desc = &vq->desc_packed[*used_id];
217 desc->id = task->buffer_id;
219 if (*used_wrap_counter)
220 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
222 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
225 *used_id += task->iovs_cnt + 2;
226 if (*used_id >= vq->size) {
227 *used_id -= vq->size;
228 *used_wrap_counter = !(*used_wrap_counter);
231 ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
234 fprintf(stderr, "failed to clear inflight io\n");
236 /* Send an interrupt back to the guest VM so that it knows
237 * a completion is ready to be processed.
239 rte_vhost_vring_call(task->bdev->vid, q_idx);
243 vhost_process_payload_chain_packed(struct vhost_blk_task *task,
252 chunck_len = task->desc_packed->len;
253 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
254 task->desc_packed->addr,
256 if (!data || chunck_len != task->desc_packed->len) {
257 fprintf(stderr, "failed to translate desc address.\n");
261 task->iovs[task->iovs_cnt].iov_base = data;
262 task->iovs[task->iovs_cnt].iov_len = task->desc_packed->len;
263 task->data_len += task->desc_packed->len;
265 task->desc_packed = descriptor_get_next_packed(task->vq, idx);
266 } while (descriptor_has_next_packed(task->desc_packed));
268 task->last_idx = *idx % task->vq->size;
269 chunck_len = task->desc_packed->len;
270 task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
271 task->desc_packed->addr,
273 if (!task->status || chunck_len != task->desc_packed->len)
274 fprintf(stderr, "failed to translate desc address.\n");
279 descriptor_is_available(struct rte_vhost_vring *vring, uint16_t idx,
280 bool avail_wrap_counter)
282 uint16_t flags = vring->desc_packed[idx].flags;
284 return ((!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter) &&
285 (!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter));
289 process_requestq_packed(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
291 bool avail_wrap_counter, used_wrap_counter;
292 uint16_t avail_idx, used_idx;
295 struct vhost_blk_queue *blk_vq;
296 struct rte_vhost_vring *vq;
297 struct vhost_blk_task *task;
299 blk_vq = &ctrlr->bdev->queues[q_idx];
302 avail_idx = blk_vq->last_avail_idx;
303 avail_wrap_counter = blk_vq->avail_wrap_counter;
304 used_idx = blk_vq->last_used_idx;
305 used_wrap_counter = blk_vq->used_wrap_counter;
307 task = rte_zmalloc(NULL, sizeof(*task), 0);
308 assert(task != NULL);
310 task->bdev = ctrlr->bdev;
312 while (descriptor_is_available(vq, avail_idx, avail_wrap_counter)) {
313 task->head_idx = avail_idx;
314 task->desc_packed = &task->vq->desc_packed[task->head_idx];
320 /* does not support indirect descriptors */
321 assert((task->desc_packed->flags & VRING_DESC_F_INDIRECT) == 0);
323 chunck_len = task->desc_packed->len;
324 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
325 task->desc_packed->addr, &chunck_len);
326 if (!task->req || chunck_len != task->desc_packed->len) {
327 fprintf(stderr, "failed to translate desc address.\n");
332 task->desc_packed = descriptor_get_next_packed(task->vq,
334 assert(task->desc_packed != NULL);
335 if (!descriptor_has_next_packed(task->desc_packed)) {
336 task->dxfer_dir = BLK_DIR_NONE;
337 task->last_idx = avail_idx % vq->size;
338 chunck_len = task->desc_packed->len;
339 task->status = (void *)(uintptr_t)
340 gpa_to_vva(task->bdev->vid,
341 task->desc_packed->addr,
344 chunck_len != task->desc_packed->len) {
346 "failed to translate desc address.\n");
351 task->readtype = descriptor_is_wr_packed(
353 vhost_process_payload_chain_packed(task, &avail_idx);
355 task->buffer_id = vq->desc_packed[task->last_idx].id;
356 rte_vhost_set_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
359 &task->inflight_idx);
361 if (++avail_idx >= vq->size) {
362 avail_idx -= vq->size;
363 avail_wrap_counter = !avail_wrap_counter;
365 blk_vq->last_avail_idx = avail_idx;
366 blk_vq->avail_wrap_counter = avail_wrap_counter;
368 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
370 /* invalid response */
371 *task->status = VIRTIO_BLK_S_IOERR;
374 *task->status = VIRTIO_BLK_S_OK;
377 submit_completion_packed(task, q_idx, &used_idx,
379 blk_vq->last_used_idx = used_idx;
380 blk_vq->used_wrap_counter = used_wrap_counter;
387 submit_inflight_vq_packed(struct vhost_blk_ctrlr *ctrlr,
390 bool used_wrap_counter;
394 struct vhost_blk_queue *blk_vq;
395 struct rte_vhost_ring_inflight *inflight_vq;
396 struct rte_vhost_resubmit_info *resubmit_info;
397 struct rte_vhost_vring *vq;
398 struct inflight_blk_task *task;
399 struct vhost_blk_task *blk_task;
400 struct rte_vhost_inflight_info_packed *inflight_info;
402 blk_vq = &ctrlr->bdev->queues[q_idx];
404 inflight_vq = &blk_vq->inflight_vq;
405 resubmit_info = inflight_vq->resubmit_inflight;
406 inflight_info = inflight_vq->inflight_packed;
407 used_idx = blk_vq->last_used_idx;
408 used_wrap_counter = blk_vq->used_wrap_counter;
410 task = rte_malloc(NULL, sizeof(*task), 0);
412 fprintf(stderr, "failed to allocate memory\n");
415 blk_task = &task->blk_task;
417 blk_task->bdev = ctrlr->bdev;
418 task->inflight_packed = inflight_vq->inflight_packed;
420 while (resubmit_info->resubmit_num-- > 0) {
421 req_idx = resubmit_info->resubmit_num;
423 resubmit_info->resubmit_list[req_idx].index;
424 task->inflight_desc =
425 &inflight_info->desc[blk_task->head_idx];
426 task->blk_task.iovs_cnt = 0;
427 task->blk_task.data_len = 0;
428 task->blk_task.req = NULL;
429 task->blk_task.status = NULL;
431 /* update the avail idx too
432 * as it's initial value equals to used idx
434 blk_vq->last_avail_idx += task->inflight_desc->num;
435 if (blk_vq->last_avail_idx >= vq->size) {
436 blk_vq->last_avail_idx -= vq->size;
437 blk_vq->avail_wrap_counter =
438 !blk_vq->avail_wrap_counter;
441 /* does not support indirect descriptors */
442 assert(task->inflight_desc != NULL);
443 assert((task->inflight_desc->flags &
444 VRING_DESC_F_INDIRECT) == 0);
446 chunck_len = task->inflight_desc->len;
447 blk_task->req = (void *)(uintptr_t)
448 gpa_to_vva(blk_task->bdev->vid,
449 task->inflight_desc->addr,
451 if (!blk_task->req ||
452 chunck_len != task->inflight_desc->len) {
453 fprintf(stderr, "failed to translate desc address.\n");
458 task->inflight_desc = inflight_desc_get_next(
459 task->inflight_packed, task->inflight_desc);
460 assert(task->inflight_desc != NULL);
461 if (!inflight_desc_has_next(task->inflight_desc)) {
462 blk_task->dxfer_dir = BLK_DIR_NONE;
463 chunck_len = task->inflight_desc->len;
464 blk_task->status = (void *)(uintptr_t)
465 gpa_to_vva(blk_task->bdev->vid,
466 task->inflight_desc->addr,
468 if (!blk_task->status ||
469 chunck_len != task->inflight_desc->len) {
471 "failed to translate desc address.\n");
477 inflight_desc_is_wr(task->inflight_desc);
478 inflight_process_payload_chain_packed(task);
481 blk_task->buffer_id = task->inflight_desc->id;
483 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, blk_task);
485 /* invalid response */
486 *blk_task->status = VIRTIO_BLK_S_IOERR;
489 *blk_task->status = VIRTIO_BLK_S_OK;
491 inflight_submit_completion_packed(task, q_idx, &used_idx,
494 blk_vq->last_used_idx = used_idx;
495 blk_vq->used_wrap_counter = used_wrap_counter;
501 static struct vring_desc *
502 descriptor_get_next_split(struct vring_desc *vq_desc,
503 struct vring_desc *cur_desc)
505 return &vq_desc[cur_desc->next];
509 descriptor_has_next_split(struct vring_desc *cur_desc)
511 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
515 descriptor_is_wr_split(struct vring_desc *cur_desc)
517 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
521 vhost_process_payload_chain_split(struct vhost_blk_task *task)
529 chunck_len = task->desc_split->len;
530 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
531 task->desc_split->addr,
533 if (!data || chunck_len != task->desc_split->len) {
534 fprintf(stderr, "failed to translate desc address.\n");
538 task->iovs[task->iovs_cnt].iov_base = data;
539 task->iovs[task->iovs_cnt].iov_len = task->desc_split->len;
540 task->data_len += task->desc_split->len;
543 descriptor_get_next_split(task->vq->desc, task->desc_split);
544 } while (descriptor_has_next_split(task->desc_split));
546 chunck_len = task->desc_split->len;
547 task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
548 task->desc_split->addr,
550 if (!task->status || chunck_len != task->desc_split->len)
551 fprintf(stderr, "failed to translate desc address.\n");
555 submit_completion_split(struct vhost_blk_task *task, uint32_t vid,
558 struct rte_vhost_vring *vq;
559 struct vring_used *used;
564 rte_vhost_set_last_inflight_io_split(vid, q_idx, task->req_idx);
566 /* Fill out the next entry in the "used" ring. id = the
567 * index of the descriptor that contained the blk request.
568 * len = the total amount of data transferred for the blk
569 * request. We must report the correct len, for variable
570 * length blk CDBs, where we may return less data than
571 * allocated by the guest VM.
573 used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
574 used->ring[used->idx & (vq->size - 1)].len = task->data_len;
579 rte_vhost_clr_inflight_desc_split(vid, q_idx, used->idx, task->req_idx);
581 /* Send an interrupt back to the guest VM so that it knows
582 * a completion is ready to be processed.
584 rte_vhost_vring_call(task->bdev->vid, q_idx);
588 submit_inflight_vq_split(struct vhost_blk_ctrlr *ctrlr,
591 struct vhost_blk_queue *blk_vq;
592 struct rte_vhost_ring_inflight *inflight_vq;
593 struct rte_vhost_resubmit_info *resubmit_inflight;
594 struct rte_vhost_resubmit_desc *resubmit_list;
595 struct vhost_blk_task *task;
600 blk_vq = &ctrlr->bdev->queues[q_idx];
601 inflight_vq = &blk_vq->inflight_vq;
602 resubmit_inflight = inflight_vq->resubmit_inflight;
603 resubmit_list = resubmit_inflight->resubmit_list;
605 task = rte_zmalloc(NULL, sizeof(*task), 0);
606 assert(task != NULL);
609 task->bdev = ctrlr->bdev;
610 task->vq = &blk_vq->vq;
612 while (resubmit_inflight->resubmit_num-- > 0) {
613 req_idx = resubmit_list[resubmit_inflight->resubmit_num].index;
614 task->req_idx = req_idx;
615 task->desc_split = &task->vq->desc[task->req_idx];
621 /* does not support indirect descriptors */
622 assert(task->desc_split != NULL);
623 assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
625 chunck_len = task->desc_split->len;
626 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
627 task->desc_split->addr, &chunck_len);
628 if (!task->req || chunck_len != task->desc_split->len) {
629 fprintf(stderr, "failed to translate desc address.\n");
634 task->desc_split = descriptor_get_next_split(task->vq->desc,
636 if (!descriptor_has_next_split(task->desc_split)) {
637 task->dxfer_dir = BLK_DIR_NONE;
638 chunck_len = task->desc_split->len;
639 task->status = (void *)(uintptr_t)
640 gpa_to_vva(task->bdev->vid,
641 task->desc_split->addr,
644 chunck_len != task->desc_split->len) {
646 "failed to translate desc address.\n");
652 descriptor_is_wr_split(task->desc_split);
653 vhost_process_payload_chain_split(task);
656 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
658 /* invalid response */
659 *task->status = VIRTIO_BLK_S_IOERR;
662 *task->status = VIRTIO_BLK_S_OK;
664 submit_completion_split(task, ctrlr->bdev->vid, q_idx);
671 process_requestq_split(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
677 struct vhost_blk_queue *blk_vq;
678 struct rte_vhost_vring *vq;
679 struct vhost_blk_task *task;
681 blk_vq = &ctrlr->bdev->queues[q_idx];
684 task = rte_zmalloc(NULL, sizeof(*task), 0);
685 assert(task != NULL);
687 task->bdev = ctrlr->bdev;
690 while (vq->avail->idx != blk_vq->last_avail_idx) {
691 last_idx = blk_vq->last_avail_idx & (vq->size - 1);
692 req_idx = vq->avail->ring[last_idx];
693 task->req_idx = req_idx;
694 task->desc_split = &task->vq->desc[task->req_idx];
700 rte_vhost_set_inflight_desc_split(ctrlr->bdev->vid, q_idx,
703 /* does not support indirect descriptors */
704 assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
706 chunck_len = task->desc_split->len;
707 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
708 task->desc_split->addr, &chunck_len);
709 if (!task->req || chunck_len != task->desc_split->len) {
710 fprintf(stderr, "failed to translate desc address.\n");
715 task->desc_split = descriptor_get_next_split(task->vq->desc,
717 if (!descriptor_has_next_split(task->desc_split)) {
718 task->dxfer_dir = BLK_DIR_NONE;
719 chunck_len = task->desc_split->len;
720 task->status = (void *)(uintptr_t)
721 gpa_to_vva(task->bdev->vid,
722 task->desc_split->addr,
725 chunck_len != task->desc_split->len) {
727 "failed to translate desc address.\n");
733 descriptor_is_wr_split(task->desc_split);
734 vhost_process_payload_chain_split(task);
736 blk_vq->last_avail_idx++;
738 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
740 /* invalid response */
741 *task->status = VIRTIO_BLK_S_IOERR;
744 *task->status = VIRTIO_BLK_S_OK;
747 submit_completion_split(task, ctrlr->bdev->vid, q_idx);
754 ctrlr_worker(void *arg)
756 struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
757 struct vhost_blk_queue *blk_vq;
758 struct rte_vhost_ring_inflight *inflight_vq;
763 fprintf(stdout, "Ctrlr Worker Thread start\n");
765 if (ctrlr == NULL || ctrlr->bdev == NULL) {
767 "%s: Error, invalid argument passed to worker thread\n",
772 thread = pthread_self();
775 pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
777 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
778 blk_vq = &ctrlr->bdev->queues[i];
779 inflight_vq = &blk_vq->inflight_vq;
780 if (inflight_vq->resubmit_inflight != NULL &&
781 inflight_vq->resubmit_inflight->resubmit_num != 0) {
782 if (ctrlr->packed_ring)
783 submit_inflight_vq_packed(ctrlr, i);
785 submit_inflight_vq_split(ctrlr, i);
789 while (!g_should_stop && ctrlr->bdev != NULL) {
790 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
791 if (ctrlr->packed_ring)
792 process_requestq_packed(ctrlr, i);
794 process_requestq_split(ctrlr, i);
799 fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
807 struct vhost_blk_ctrlr *ctrlr;
808 struct vhost_blk_queue *blk_vq;
809 struct rte_vhost_vring *vq;
814 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
816 fprintf(stderr, "Controller is not ready\n");
823 ctrlr->bdev->vid = vid;
824 ret = rte_vhost_get_negotiated_features(vid, &features);
826 fprintf(stderr, "failed to get the negotiated features\n");
829 ctrlr->packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
831 ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
833 fprintf(stderr, "Get Controller memory region failed\n");
834 assert(ctrlr->mem != NULL);
836 /* Disable Notifications and init last idx */
837 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
838 blk_vq = &ctrlr->bdev->queues[i];
841 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
844 ret = rte_vhost_get_vring_base(ctrlr->bdev->vid, i,
845 &blk_vq->last_avail_idx,
846 &blk_vq->last_used_idx);
849 ret = rte_vhost_get_vhost_ring_inflight(ctrlr->bdev->vid, i,
850 &blk_vq->inflight_vq);
853 if (ctrlr->packed_ring) {
854 /* for the reconnection */
855 ret = rte_vhost_get_vring_base_from_inflight(
857 &blk_vq->last_avail_idx,
858 &blk_vq->last_used_idx);
860 blk_vq->avail_wrap_counter = blk_vq->last_avail_idx &
862 blk_vq->last_avail_idx = blk_vq->last_avail_idx &
864 blk_vq->used_wrap_counter = blk_vq->last_used_idx &
866 blk_vq->last_used_idx = blk_vq->last_used_idx &
870 rte_vhost_enable_guest_notification(vid, i, 0);
873 /* start polling vring */
875 fprintf(stdout, "New Device %s, Device ID %d\n", dev_pathname, vid);
876 if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
877 fprintf(stderr, "Worker Thread Started Failed\n");
881 /* device has been started */
888 destroy_device(int vid)
891 struct vhost_blk_ctrlr *ctrlr;
892 struct vhost_blk_queue *blk_vq;
895 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
897 fprintf(stderr, "Destroy Ctrlr Failed\n");
901 fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
902 ctrlr = vhost_blk_ctrlr_find(path);
904 fprintf(stderr, "Destroy Ctrlr Failed\n");
912 while (g_should_stop != 2)
915 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
916 blk_vq = &ctrlr->bdev->queues[i];
917 if (ctrlr->packed_ring) {
918 blk_vq->last_avail_idx |= (blk_vq->avail_wrap_counter <<
920 blk_vq->last_used_idx |= (blk_vq->used_wrap_counter <<
923 rte_vhost_set_vring_base(ctrlr->bdev->vid, i,
924 blk_vq->last_avail_idx,
925 blk_vq->last_used_idx);
935 new_connection(int vid)
937 /* extend the proper features for block device */
938 vhost_session_install_rte_compat_hooks(vid);
943 struct vhost_device_ops vhost_blk_device_ops = {
944 .new_device = new_device,
945 .destroy_device = destroy_device,
946 .new_connection = new_connection,
949 static struct vhost_block_dev *
950 vhost_blk_bdev_construct(const char *bdev_name,
951 const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt,
954 struct vhost_block_dev *bdev;
956 bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
960 strncpy(bdev->name, bdev_name, sizeof(bdev->name));
961 strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
962 bdev->blocklen = blk_size;
963 bdev->blockcnt = blk_cnt;
964 bdev->write_cache = wce_enable;
966 fprintf(stdout, "blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
969 /* use memory as disk storage space */
970 bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
972 fprintf(stderr, "no enough reserved huge memory for disk\n");
980 static struct vhost_blk_ctrlr *
981 vhost_blk_ctrlr_construct(const char *ctrlr_name)
984 struct vhost_blk_ctrlr *ctrlr;
988 /* always use current directory */
989 path = getcwd(cwd, PATH_MAX);
991 fprintf(stderr, "Cannot get current working directory\n");
994 snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
996 if (access(dev_pathname, F_OK) != -1) {
997 if (unlink(dev_pathname) != 0)
998 rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
1002 if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
1003 fprintf(stderr, "socket %s already exists\n", dev_pathname);
1007 ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES);
1009 fprintf(stderr, "Set vhost driver features failed\n");
1010 rte_vhost_driver_unregister(dev_pathname);
1014 /* set proper features */
1015 vhost_dev_install_rte_compat_hooks(dev_pathname);
1017 ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
1019 rte_vhost_driver_unregister(dev_pathname);
1023 /* hardcoded block device information with 128MiB */
1024 ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0",
1028 rte_vhost_driver_unregister(dev_pathname);
1032 rte_vhost_driver_callback_register(dev_pathname,
1033 &vhost_blk_device_ops);
1039 signal_handler(__rte_unused int signum)
1041 struct vhost_blk_ctrlr *ctrlr;
1043 if (access(dev_pathname, F_OK) == 0)
1044 unlink(dev_pathname);
1046 if (g_should_stop != -1) {
1048 while (g_should_stop != 2)
1052 ctrlr = vhost_blk_ctrlr_find(dev_pathname);
1053 if (ctrlr != NULL) {
1054 if (ctrlr->bdev != NULL) {
1055 rte_free(ctrlr->bdev->data);
1056 rte_free(ctrlr->bdev);
1061 rte_vhost_driver_unregister(dev_pathname);
1065 int main(int argc, char *argv[])
1069 signal(SIGINT, signal_handler);
1072 ret = rte_eal_init(argc, argv);
1074 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1076 g_vhost_ctrlr = vhost_blk_ctrlr_construct("vhost.socket");
1077 if (g_vhost_ctrlr == NULL) {
1078 fprintf(stderr, "Construct vhost blk controller failed\n");
1082 if (sem_init(&exit_sem, 0, 0) < 0) {
1083 fprintf(stderr, "Error init exit_sem\n");
1087 rte_vhost_driver_start(dev_pathname);
1089 /* loop for exit the application */